Blob Blame History Raw
commit 0fa5ce2c14fa36610630469c14c07537eb4f4807
Author: Andrew Beekhof <andrew@beekhof.net>
Date:   Wed Oct 1 16:56:59 2014 +1000

    Import: pacemaker-rollup-be1e835

diff --git a/attrd/Makefile.am b/attrd/Makefile.am
index 802a3fa..9d5e223 100644
--- a/attrd/Makefile.am
+++ b/attrd/Makefile.am
@@ -32,25 +32,12 @@ attrd_LDADD	= $(top_builddir)/lib/cluster/libcrmcluster.la 		\
 		$(top_builddir)/lib/cib/libcib.la			\
 		$(CLUSTERLIBS)
 
-if BUILD_HEARTBEAT_SUPPORT
-attrd_SOURCES	+= legacy.c
-else
-
-if BUILD_CS_SUPPORT
-
-if BUILD_CS_PLUGIN
-attrd_SOURCES	+= legacy.c
-else
-# Only build the new version where CPG is exclusively used for communication
+if BUILD_ATOMIC_ATTRD
 attrd_SOURCES	+= main.c commands.c
-endif
-
 else
 attrd_SOURCES	+= legacy.c
 endif
 
-endif
-
 clean-generic:
 	rm -f *.log *.debug *.xml *~
 
diff --git a/attrd/commands.c b/attrd/commands.c
index 038e7e4..c48ef1b 100644
--- a/attrd/commands.c
+++ b/attrd/commands.c
@@ -17,6 +17,8 @@
  */
 #include <crm_internal.h>
 
+#include <sys/types.h>
+#include <regex.h>
 #include <glib.h>
 
 #include <crm/msg_xml.h>
@@ -63,7 +65,7 @@ typedef struct attribute_value_s {
 
 void write_attribute(attribute_t *a);
 void write_or_elect_attribute(attribute_t *a);
-void attrd_peer_update(crm_node_t *peer, xmlNode *xml, bool filter);
+void attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter);
 void attrd_peer_sync(crm_node_t *peer, xmlNode *xml);
 void attrd_peer_remove(uint32_t nodeid, const char *host, gboolean uncache, const char *source);
 
@@ -191,16 +193,41 @@ attrd_client_message(crm_client_t *client, xmlNode *xml)
         char *host = crm_element_value_copy(xml, F_ATTRD_HOST);
         const char *attr = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
         const char *value = crm_element_value(xml, F_ATTRD_VALUE);
+        const char *regex = crm_element_value(xml, F_ATTRD_REGEX);
 
-        a = g_hash_table_lookup(attributes, attr);
+        if(attr == NULL && regex) {
+            GHashTableIter aIter;
+            regex_t *r_patt = calloc(1, sizeof(regex_t));
+
+            crm_debug("Setting %s to %s", regex, value);
+            if (regcomp(r_patt, regex, REG_EXTENDED)) {
+                crm_err("Bad regex '%s' for update", regex);
+                regfree(r_patt);
+                free(r_patt);
+                return;
+            }
 
-        if(host == NULL) {
+            g_hash_table_iter_init(&aIter, attributes);
+            while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) {
+                int status = regexec(r_patt, attr, 0, NULL, 0);
+
+                if(status == 0) {
+                    crm_trace("Matched %s with %s", attr, regex);
+                    crm_xml_add(xml, F_ATTRD_ATTRIBUTE, attr);
+                    send_attrd_message(NULL, xml);
+                }
+            }
+            return;
+
+        } else if(host == NULL) {
             crm_trace("Inferring host");
             host = strdup(attrd_cluster->uname);
             crm_xml_add(xml, F_ATTRD_HOST, host);
             crm_xml_add_int(xml, F_ATTRD_HOST_ID, attrd_cluster->nodeid);
         }
 
+        a = g_hash_table_lookup(attributes, attr);
+
         if (value) {
             int offset = 1;
             int int_value = 0;
@@ -254,6 +281,7 @@ attrd_client_message(crm_client_t *client, xmlNode *xml)
     }
 
     if(broadcast) {
+        /* Ends up at attrd_peer_message() */
         send_attrd_message(NULL, xml);
     }
 }
@@ -265,6 +293,7 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml)
     const char *v = crm_element_value(xml, F_ATTRD_VERSION);
     const char *op = crm_element_value(xml, F_ATTRD_TASK);
     const char *election_op = crm_element_value(xml, F_CRM_TASK);
+    const char *host = crm_element_value(xml, F_ATTRD_HOST);
 
     if(election_op) {
         enum election_result rc = 0;
@@ -293,7 +322,7 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml)
             const char *name = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
 
             crm_trace("Compatibility update of %s from %s", name, peer->uname);
-            attrd_peer_update(peer, xml, FALSE);
+            attrd_peer_update(peer, xml, host, FALSE);
 
         } else if(safe_str_eq(op, "flush")) {
             const char *name = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
@@ -336,13 +365,12 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml)
     }
 
     if(safe_str_eq(op, "update")) {
-        attrd_peer_update(peer, xml, FALSE);
+        attrd_peer_update(peer, xml, host, FALSE);
 
     } else if(safe_str_eq(op, "sync")) {
         attrd_peer_sync(peer, xml);
 
     } else if(safe_str_eq(op, "peer-remove")) {
-        const char *host = crm_element_value(xml, F_ATTRD_HOST);
         attrd_peer_remove(0, host, TRUE, peer->uname);
 
     } else if(safe_str_eq(op, "sync-response")
@@ -351,7 +379,8 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml)
 
         crm_notice("Processing %s from %s", op, peer->uname);
         for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) {
-            attrd_peer_update(peer, child, TRUE);
+            host = crm_element_value(child, F_ATTRD_HOST);
+            attrd_peer_update(peer, child, host, TRUE);
         }
     }
 }
@@ -409,12 +438,11 @@ attrd_peer_remove(uint32_t nodeid, const char *host, gboolean uncache, const cha
 }
 
 void
-attrd_peer_update(crm_node_t *peer, xmlNode *xml, bool filter)
+attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter)
 {
     bool changed = FALSE;
     attribute_value_t *v = NULL;
 
-    const char *host = crm_element_value(xml, F_ATTRD_HOST);
     const char *attr = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
     const char *value = crm_element_value(xml, F_ATTRD_VALUE);
 
@@ -424,6 +452,19 @@ attrd_peer_update(crm_node_t *peer, xmlNode *xml, bool filter)
         a = create_attribute(xml);
     }
 
+    if(host == NULL) {
+        GHashTableIter vIter;
+        g_hash_table_iter_init(&vIter, a->values);
+
+        crm_debug("Setting %s for all hosts to %s", attr, value);
+
+        xml_remove_prop(xml, F_ATTRD_HOST_ID);
+        while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) {
+            attrd_peer_update(peer, xml, host, filter);
+        }
+        return;
+    }
+
     v = g_hash_table_lookup(a->values, host);
 
     if(v == NULL) {
diff --git a/cib/messages.c b/cib/messages.c
index 4b79912..9c66349 100644
--- a/cib/messages.c
+++ b/cib/messages.c
@@ -292,6 +292,11 @@ cib_process_upgrade_server(const char *op, int options, const char *section, xml
             crm_xml_add(up, F_TYPE, "cib");
             crm_xml_add(up, F_CIB_OPERATION, CIB_OP_UPGRADE);
             crm_xml_add(up, F_CIB_SCHEMA_MAX, get_schema_name(new_version));
+            crm_xml_add(up, F_CIB_DELEGATED, host);
+            crm_xml_add(up, F_CIB_CLIENTID, crm_element_value(req, F_CIB_CLIENTID));
+            crm_xml_add(up, F_CIB_CALLOPTS, crm_element_value(req, F_CIB_CALLOPTS));
+            crm_xml_add(up, F_CIB_CALLID, crm_element_value(req, F_CIB_CALLID));
+
             send_cluster_message(NULL, crm_msg_cib, up, FALSE);
             free_xml(up);
 
diff --git a/configure.ac b/configure.ac
index 40adffe..1edff40 100644
--- a/configure.ac
+++ b/configure.ac
@@ -75,6 +75,7 @@ CC_IN_CONFIGURE=yes
 export CC_IN_CONFIGURE
 
 LDD=ldd
+BUILD_ATOMIC_ATTRD=1
 
 dnl ========================================================================
 dnl Compiler characteristics
@@ -1260,6 +1261,7 @@ case $SUPPORT_HEARTBEAT in
 	dnl objdump -x ${libdir}/libccmclient.so | grep SONAME | awk '{print $2}'
 	AC_DEFINE_UNQUOTED(CCM_LIBRARY, "libccmclient.so.1", Library to load for ccm support)
 	AC_DEFINE_UNQUOTED(HEARTBEAT_LIBRARY, "libhbclient.so.1", Library to load for heartbeat support)
+	BUILD_ATOMIC_ATTRD=0
    else
 	SUPPORT_HEARTBEAT=0
    fi
@@ -1341,6 +1343,7 @@ SUPPORT_PLUGIN=0
 if test $SUPPORT_CS = 1 -a x$HAVE_confdb = x1; then
     dnl Need confdb to support cman and the plugins
     SUPPORT_PLUGIN=1
+    BUILD_ATOMIC_ATTRD=0
     LCRSODIR=`$PKGCONFIG corosync --variable=lcrsodir`
     STACKS="$STACKS corosync-plugin"
     COROSYNC_LIBS="$COROSYNC_LIBS $confdb_LIBS"
@@ -1382,6 +1385,9 @@ AM_CONDITIONAL(BUILD_CS_SUPPORT, test $SUPPORT_CS = 1)
 AM_CONDITIONAL(BUILD_CS_PLUGIN, test $SUPPORT_PLUGIN = 1)
 AM_CONDITIONAL(BUILD_CMAN, test $SUPPORT_CMAN = 1)
 
+AM_CONDITIONAL(BUILD_ATOMIC_ATTRD, test $BUILD_ATOMIC_ATTRD = 1)
+AC_DEFINE_UNQUOTED(HAVE_ATOMIC_ATTRD, $BUILD_ATOMIC_ATTRD, Support the new atomic attrd)
+
 AC_SUBST(SUPPORT_CMAN)
 AC_SUBST(SUPPORT_CS)
 
@@ -1401,6 +1407,9 @@ else
     PCMK_FEATURES="$PCMK_FEATURES $STACKS"
 fi
 
+if test ${BUILD_ATOMIC_ATTRD} = 1; then
+    PCMK_FEATURES="$PCMK_FEATURES atomic-attrd"
+fi
 AC_SUBST(CLUSTERLIBS)
 AC_SUBST(LCRSODIR)
 
@@ -1871,6 +1880,7 @@ tools/Makefile							\
 	tools/crm_report					\
         tools/report.common                                     \
 	tools/cibsecret						\
+	tools/crm_mon.upstart					\
 xml/Makefile							\
 lib/gnu/Makefile						\
 		)
diff --git a/crmd/lrm.c b/crmd/lrm.c
index db0bffb..44634fb 100644
--- a/crmd/lrm.c
+++ b/crmd/lrm.c
@@ -1162,7 +1162,7 @@ get_lrm_resource(lrm_state_t * lrm_state, xmlNode * resource, xmlNode * op_msg,
         if (!rsc) {
             fsa_data_t *msg_data = NULL;
 
-            crm_err("Could not add resource %s to LRM", id);
+            crm_err("Could not add resource %s to LRM %s", id, lrm_state->node_name);
             register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
         }
     }
@@ -1175,13 +1175,17 @@ delete_resource(lrm_state_t * lrm_state,
                 const char *id,
                 lrmd_rsc_info_t * rsc,
                 GHashTableIter * gIter,
-                const char *sys, const char *host, const char *user, ha_msg_input_t * request)
+                const char *sys,
+                const char *host,
+                const char *user,
+                ha_msg_input_t * request,
+                gboolean unregister)
 {
     int rc = pcmk_ok;
 
     crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host);
 
-    if (rsc) {
+    if (rsc && unregister) {
         rc = lrm_state_unregister_rsc(lrm_state, id, 0);
     }
 
@@ -1224,6 +1228,7 @@ do_lrm_invoke(long long action,
     const char *user_name = NULL;
     const char *target_node = NULL;
     gboolean is_remote_node = FALSE;
+    gboolean crm_rsc_delete = FALSE;
 
     if (input->xml != NULL) {
         /* Remote node operations are routed here to their remote connections */
@@ -1259,6 +1264,8 @@ do_lrm_invoke(long long action,
     crm_trace("LRM command from: %s", from_sys);
 
     if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) {
+        /* remember this delete op came from crm_resource */
+        crm_rsc_delete = TRUE;
         operation = CRMD_ACTION_DELETE;
 
     } else if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
@@ -1370,13 +1377,17 @@ do_lrm_invoke(long long action,
     } else if (safe_str_eq(operation, CRM_OP_REPROBE) || safe_str_eq(crm_op, CRM_OP_REPROBE)) {
         GHashTableIter gIter;
         rsc_history_t *entry = NULL;
+        gboolean unregister = is_remote_lrmd_ra(NULL, NULL, entry->id) ? FALSE : TRUE;
 
         crm_notice("Forcing the status of all resources to be redetected");
 
         g_hash_table_iter_init(&gIter, lrm_state->resource_history);
         while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
+            /* only unregister the resource during a reprobe if it is not a remote connection
+             * resource. otherwise unregistering the connection will terminate remote-node
+             * membership */
             delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host,
-                            user_name, NULL);
+                            user_name, NULL, unregister);
         }
 
         /* Now delete the copy in the CIB */
@@ -1499,6 +1510,7 @@ do_lrm_invoke(long long action,
             free(op_key);
 
         } else if (rsc != NULL && safe_str_eq(operation, CRMD_ACTION_DELETE)) {
+            gboolean unregister = TRUE;
 
 #if ENABLE_ACL
             int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun | cib_sync_call, user_name);
@@ -1523,7 +1535,11 @@ do_lrm_invoke(long long action,
                 return;
             }
 #endif
-            delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input);
+            if (crm_rsc_delete == TRUE && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
+                unregister = FALSE;
+            }
+
+            delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input, unregister);
 
         } else if (rsc != NULL) {
             do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg);
diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c
index 98f59c8..f3dedeb 100644
--- a/crmd/remote_lrmd_ra.c
+++ b/crmd/remote_lrmd_ra.c
@@ -251,6 +251,8 @@ connection_takeover_timeout_cb(gpointer data)
     crm_debug("takeover event timed out for node %s", cmd->rsc_id);
     cmd->takeover_timeout_id = 0;
 
+    lrm_state = lrm_state_find(cmd->rsc_id);
+
     handle_remote_ra_stop(lrm_state, cmd);
     free_cmd(cmd);
 
@@ -379,6 +381,11 @@ remote_lrm_op_callback(lrmd_event_data_t * op)
             cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
 
         } else {
+
+            if (safe_str_eq(cmd->action, "start")) {
+                /* clear PROBED value if it happens to be set after start completes. */
+                update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
+            }
             lrm_state_reset_tables(lrm_state);
             cmd->rc = PCMK_OCF_OK;
             cmd->op_status = PCMK_LRM_OP_DONE;
diff --git a/crmd/te_actions.c b/crmd/te_actions.c
index 926996b..a3aa78b 100644
--- a/crmd/te_actions.c
+++ b/crmd/te_actions.c
@@ -546,17 +546,26 @@ te_update_job_count(crm_action_t * action, int offset)
         return;
     }
 
-    if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
+    /* if we have a router node, this means the action is performing
+     * on a remote node. For now, we count all action occuring on a
+     * remote node against the job list on the cluster node hosting
+     * the connection resources */
+    target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
+
+    if ((target == NULL) &&
+        (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED))) {
+
         const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
         const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
 
         te_update_job_count_on(t1, offset, TRUE);
         te_update_job_count_on(t2, offset, TRUE);
-
-    } else {
-
-        te_update_job_count_on(target, offset, FALSE);
+        return;
+    } else if (target == NULL) {
+        target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
     }
+
+    te_update_job_count_on(target, offset, FALSE);
 }
 
 static gboolean
@@ -597,6 +606,8 @@ te_should_perform_action_on(crm_graph_t * graph, crm_action_t * action, const ch
         }
     }
 
+    crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
+
     return TRUE;
 }
 
@@ -611,7 +622,15 @@ te_should_perform_action(crm_graph_t * graph, crm_action_t * action)
         return TRUE;
     }
 
-    if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
+    /* if we have a router node, this means the action is performing
+     * on a remote node. For now, we count all action occuring on a
+     * remote node against the job list on the cluster node hosting
+     * the connection resources */
+    target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
+
+    if ((target == NULL) &&
+        (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED))) {
+
         target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
         if(te_should_perform_action_on(graph, action, target) == FALSE) {
             return FALSE;
@@ -619,7 +638,7 @@ te_should_perform_action(crm_graph_t * graph, crm_action_t * action)
 
         target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
 
-    } else {
+    } else if (target == NULL) {
         target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
     }
 
diff --git a/crmd/te_events.c b/crmd/te_events.c
index afe3072..b81a13e 100644
--- a/crmd/te_events.c
+++ b/crmd/te_events.c
@@ -161,10 +161,6 @@ update_failcount(xmlNode * event, const char *event_node_uuid, int rc, int targe
         do_update = TRUE;
         value = failed_stop_offset;
 
-    } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
-        do_update = TRUE;
-        value = failed_stop_offset;
-
     } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
         do_update = TRUE;
 
diff --git a/crmd/throttle.c b/crmd/throttle.c
index 04a3cf1..6e853ae 100644
--- a/crmd/throttle.c
+++ b/crmd/throttle.c
@@ -430,7 +430,7 @@ throttle_mode(void)
     unsigned int blocked = 0;
     enum throttle_state_e mode = throttle_none;
 
-#ifndef ON_SOLARIS
+#ifdef ON_SOLARIS
     return throttle_none;
 #endif
 
@@ -508,44 +508,41 @@ static void
 throttle_send_command(enum throttle_state_e mode)
 {
     xmlNode *xml = NULL;
+    static enum throttle_state_e last = -1;
 
-    xml = create_request(CRM_OP_THROTTLE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
-    crm_xml_add_int(xml, F_CRM_THROTTLE_MODE, mode);
-    crm_xml_add_int(xml, F_CRM_THROTTLE_MAX, throttle_job_max);
+    if(mode != last) {
+        crm_info("New throttle mode: %.4x (was %.4x)", mode, last);
+        last = mode;
 
-    send_cluster_message(NULL, crm_msg_crmd, xml, TRUE);
-    free_xml(xml);
+        xml = create_request(CRM_OP_THROTTLE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
+        crm_xml_add_int(xml, F_CRM_THROTTLE_MODE, mode);
+        crm_xml_add_int(xml, F_CRM_THROTTLE_MAX, throttle_job_max);
 
-    crm_info("Updated throttle state to %.4x", mode);
+        send_cluster_message(NULL, crm_msg_crmd, xml, TRUE);
+        free_xml(xml);
+    }
 }
 
 static gboolean
 throttle_timer_cb(gpointer data)
 {
     static bool send_updates = FALSE;
-    static enum throttle_state_e last = -1;
-
     enum throttle_state_e now = throttle_none;
 
-    if(send_updates == FALSE) {
-        /* Optimize for the true case */
-        if(compare_version(fsa_our_dc_version, "3.0.8") < 0) {
-            crm_trace("DC version %s doesn't support throttling", fsa_our_dc_version);
-
-        } else {
-            send_updates = TRUE;
-        }
-    }
-
     if(send_updates) {
         now = throttle_mode();
-    }
+        throttle_send_command(now);
+
+    } else if(compare_version(fsa_our_dc_version, "3.0.8") < 0) {
+        /* Optimize for the true case */
+        crm_trace("DC version %s doesn't support throttling", fsa_our_dc_version);
 
-    if(send_updates && now != last) {
-        crm_debug("New throttle mode: %.4x (was %.4x)", now, last);
+    } else {
+        send_updates = TRUE;
+        now = throttle_mode();
         throttle_send_command(now);
-        last = now;
     }
+
     return TRUE;
 }
 
@@ -595,9 +592,11 @@ throttle_update_job_max(const char *preference)
 void
 throttle_init(void)
 {
-    throttle_records = g_hash_table_new_full(
-        crm_str_hash, g_str_equal, NULL, throttle_record_free);
-    throttle_timer = mainloop_timer_add("throttle", 30* 1000, TRUE, throttle_timer_cb, NULL);
+    if(throttle_records == NULL) {
+        throttle_records = g_hash_table_new_full(
+            crm_str_hash, g_str_equal, NULL, throttle_record_free);
+        throttle_timer = mainloop_timer_add("throttle", 30 * 1000, TRUE, throttle_timer_cb, NULL);
+    }
 
     throttle_update_job_max(NULL);
     mainloop_timer_start(throttle_timer);
diff --git a/cts/CTS.py b/cts/CTS.py
index 04189f2..f4198c4 100644
--- a/cts/CTS.py
+++ b/cts/CTS.py
@@ -225,10 +225,13 @@ class CtsLab:
 
 class NodeStatus:
     def __init__(self, env):
-        pass
+        self.Env = env
 
     def IsNodeBooted(self, node):
         '''Return TRUE if the given node is booted (responds to pings)'''
+        if self.Env["docker"]:
+            return RemoteFactory().getInstance()("localhost", "docker inspect --format {{.State.Running}} %s | grep -q true" % node, silent=True) == 0
+
         return RemoteFactory().getInstance()("localhost", "ping -nq -c1 -w1 %s" % node, silent=True) == 0
 
     def IsSshdUp(self, node):
@@ -442,6 +445,9 @@ class ClusterManager(UserDict):
             self.debug("Quorum: %d Len: %d" % (q, len(self.Env["nodes"])))
             return peer_list
 
+        for n in self.Env["nodes"]:
+            peer_state[n] = "unknown"
+
         # Now see if any states need to be updated
         self.debug("looking for: " + repr(stonith.regexes))
         shot = stonith.look(0)
@@ -457,7 +463,8 @@ class ClusterManager(UserDict):
                     peer_state[peer] = "complete"
                     self.__instance_errorstoignore.append(self.templates["Pat:Fencing_ok"] % peer)
 
-                elif re.search(self.templates["Pat:Fencing_start"] % n, shot):
+                elif peer_state[n] != "complete" and re.search(self.templates["Pat:Fencing_start"] % n, shot):
+                    # TODO: Correctly detect multiple fencing operations for the same host
                     peer = n
                     peer_state[peer] = "in-progress"
                     self.__instance_errorstoignore.append(self.templates["Pat:Fencing_start"] % peer)
diff --git a/cts/CTSlab.py b/cts/CTSlab.py
index 314c347..9b336a5 100755
--- a/cts/CTSlab.py
+++ b/cts/CTSlab.py
@@ -107,9 +107,9 @@ if __name__ == '__main__':
 
     if Environment["ListTests"] == 1:
         Tests = TestList(cm, Audits)
-        Environment.log("Total %d tests"%len(Tests))
+        LogFactory().log("Total %d tests"%len(Tests))
         for test in Tests :
-            Environment.log(str(test.name));
+            LogFactory().log(str(test.name));
         sys.exit(0)
 
     elif len(Environment["tests"]) == 0:
diff --git a/cts/CTStests.py b/cts/CTStests.py
index 918dff0..cd5b7ce 100644
--- a/cts/CTStests.py
+++ b/cts/CTStests.py
@@ -83,6 +83,7 @@ class CTSTest:
         self.passed = 1
         self.is_loop = 0
         self.is_unsafe = 0
+        self.is_docker_unsafe = 0
         self.is_experimental = 0
         self.is_container = 0
         self.is_valgrind = 0
@@ -224,6 +225,8 @@ class CTSTest:
             return 0
         elif self.is_experimental and not self.Env["experimental-tests"]:
             return 0
+        elif self.is_docker_unsafe and self.Env["docker"]:
+            return 0
         elif self.is_container and not self.Env["container-tests"]:
             return 0
         elif self.Env["benchmark"] and self.benchmark == 0:
@@ -1359,6 +1362,8 @@ class ComponentFail(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "ComponentFail"
+        # TODO make this work correctly in docker.
+        self.is_docker_unsafe = 1
         self.startall = SimulStartLite(cm)
         self.complist = cm.Components()
         self.patterns = []
@@ -1419,6 +1424,15 @@ class ComponentFail(CTSTest):
                 self.okerrpatterns.append(self.templates["Pat:ChildRespawn"] %(node, chosen.name))
                 self.okerrpatterns.append(self.templates["Pat:ChildExit"])
 
+        if chosen.name == "stonith":
+            # Ignore actions for STONITH resources
+            (rc, lines) = self.rsh(node, "crm_resource -c", None)
+            for line in lines:
+                if re.search("^Resource", line):
+                    r = AuditResource(self.CM, line)
+                    if r.rclass == "stonith":
+                        self.okerrpatterns.append(self.templates["LogActions: Recover.*%s"] % r.id)
+
         # supply a copy so self.patterns doesnt end up empty
         tmpPats = []
         tmpPats.extend(self.patterns)
@@ -2512,6 +2526,7 @@ class RemoteLXC(CTSTest):
         self.startall = SimulStartLite(cm)
         self.num_containers = 2
         self.is_container = 1
+        self.is_docker_unsafe = 1
         self.failed = 0
         self.fail_string = ""
 
@@ -2624,6 +2639,7 @@ class RemoteBaremetal(CTSTest):
     def __init__(self, cm):
         CTSTest.__init__(self,cm)
         self.name = "RemoteBaremetal"
+        self.is_docker_unsafe = 1
         self.start = StartTest(cm)
         self.startall = SimulStartLite(cm)
         self.stop = StopTest(cm)
diff --git a/cts/environment.py b/cts/environment.py
index de1d099..d741452 100644
--- a/cts/environment.py
+++ b/cts/environment.py
@@ -71,6 +71,7 @@ class Environment:
         self["loop-tests"] = 1
         self["scenario"] = "random"
         self["stats"] = 0
+        self["docker"] = 0
 
         self.RandomGen = random.Random()
         self.logger = LogFactory()
@@ -143,7 +144,9 @@ class Environment:
                 # GoodThing(tm).
                 try:
                     n = node.strip()
-                    gethostbyname_ex(n)
+                    if self.data["docker"] == 0:
+                        gethostbyname_ex(n)
+
                     self.Nodes.append(n) 
                 except:
                     self.logger.log(node+" not found in DNS... aborting")
@@ -191,7 +194,10 @@ class Environment:
             return "crm-lha"
 
         elif self.data["Stack"] == "corosync 2.x":
-            return "crm-mcp"
+            if self["docker"]:
+                return "crm-mcp-docker"
+            else:
+                return "crm-mcp"
 
         elif self.data["Stack"] == "corosync (cman)":
             return "crm-cman"
@@ -342,6 +348,10 @@ class Environment:
             elif args[i] == "--qarsh":
                 RemoteFactory().enable_qarsh()
 
+            elif args[i] == "--docker":
+                self["docker"] = 1
+                RemoteFactory().enable_docker()
+
             elif args[i] == "--stonith" or args[i] == "--fencing":
                 skipthis=1
                 if args[i+1] == "1" or args[i+1] == "yes":
@@ -352,6 +362,9 @@ class Environment:
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_xvm"
                     self["stonith-params"] = "pcmk_arg_map=domain:uname,delay=0"
+                elif args[i+1] == "docker":
+                    self["DoStonith"]=1
+                    self["stonith-type"] = "fence_docker_cts"
                 elif args[i+1] == "scsi":
                     self["DoStonith"]=1
                     self["stonith-type"] = "fence_scsi"
@@ -644,6 +657,7 @@ class Environment:
         print "\t [--container-tests]          include pacemaker_remote tests that run in lxc container resources"
         print "\t [--oprofile 'node list']     list of cluster nodes to run oprofile on]"
         print "\t [--qarsh]                    use the QARSH backdoor to access nodes instead of SSH"
+        print "\t [--docker]                   Indicates nodes are docker nodes."
         print "\t [--seed random_seed]"
         print "\t [--set option=value]"
         print "\t "
diff --git a/cts/lxc_autogen.sh.in b/cts/lxc_autogen.sh.in
index 6900b67..e11532b 100755
--- a/cts/lxc_autogen.sh.in
+++ b/cts/lxc_autogen.sh.in
@@ -72,6 +72,7 @@ if [ $verify -eq 1 ]; then
 	virsh -c lxc:/// list --all > /dev/null 2>&1
 	if [ $? -ne 0 ]; then
 		echo "Could not connect 'virsh -c lxc:///' check that libvirt lxc driver is installed"
+		# yum install -y libvirt-daemon-driver-lxc libvirt-daemon-lxc libvirt-login-shell
 		exit 1
 	fi
 
diff --git a/cts/patterns.py b/cts/patterns.py
index f651965..8d34e1c 100644
--- a/cts/patterns.py
+++ b/cts/patterns.py
@@ -364,9 +364,12 @@ class crm_cs_v0(BasePatterns):
         self.components["stonith-ignore"] = [
             "LogActions: Recover Fencing",
             "Updating failcount for Fencing",
+            "error: crm_ipc_read: Connection to stonith-ng failed",
+            "error: mainloop_gio_callback: Connection to stonith-ng.*closed (I/O condition=17)",
+            "crit: tengine_stonith_connection_destroy: Fencing daemon connection failed",
             "error: te_connect_stonith:.*Sign-in failed: triggered a retry",
             "STONITH connection failed, finalizing .* pending operations.",
-            "process_lrm_event:.*Operation Fencing.* Error"
+            "process_lrm_event:.*Operation Fencing.* Error",
         ]
         self.components["stonith-ignore"].extend(self.components["common-ignore"])
 
@@ -409,6 +412,20 @@ class crm_mcp(crm_cs_v0):
 #                "Pat:We_stopped"   : "%s.*Stopped Corosync Cluster Engine",
 #            })
 
+class crm_mcp_docker(crm_mcp):
+    '''
+    The crm version 4 cluster manager class.
+    It implements the things we need to talk to and manipulate
+    crm clusters running on top of native corosync (no plugins)
+    '''
+    def __init__(self, name):
+        crm_mcp.__init__(self, name)
+
+        self.commands.update({
+            "StartCmd"       : "pcmk_start",
+            "StopCmd"        : "pcmk_stop",
+        })
+
 class crm_cman(crm_cs_v0):
     '''
     The crm version 3 cluster manager class.
@@ -454,6 +471,8 @@ class PatternSelector:
             crm_cman(name)
         elif name == "crm-mcp":
             crm_mcp(name)
+        elif name == "crm-mcp-docker":
+            crm_mcp_docker(name)
 
     def get_variant(self, variant):
         if patternvariants.has_key(variant):
diff --git a/cts/remote.py b/cts/remote.py
index c8253c3..7920fc9 100644
--- a/cts/remote.py
+++ b/cts/remote.py
@@ -261,6 +261,12 @@ class RemoteFactory:
     def new(self, silent=False):
         return RemoteExec(RemoteFactory.rsh, silent)
 
+    def enable_docker(self):
+        print "Using DOCKER backend for connections to cluster nodes"
+
+        RemoteFactory.rsh.Command = "/usr/libexec/phd/docker/phd_docker_remote_cmd "
+        RemoteFactory.rsh.CpCommand = "/usr/libexec/phd/docker/phd_docker_cp"
+
     def enable_qarsh(self):
         # http://nstraz.wordpress.com/2008/12/03/introducing-qarsh/
         print "Using QARSH for connections to cluster nodes"
diff --git a/cts/watcher.py b/cts/watcher.py
index d33e580..5e6ee43 100644
--- a/cts/watcher.py
+++ b/cts/watcher.py
@@ -165,7 +165,11 @@ class FileObj(SearchObj):
             global log_watcher_bin
 
             self.debug("Installing %s on %s" % (log_watcher_bin, host))
-            self.rsh(host, '''echo "%s" > %s''' % (log_watcher, log_watcher_bin), silent=True)
+
+            os.system("cat << END >> %s\n%s\nEND" %(log_watcher_bin, log_watcher))
+            os.system("chmod 755 %s" %(log_watcher_bin))
+
+            self.rsh.cp(log_watcher_bin, "root@%s:%s" % (host, log_watcher_bin))
             has_log_watcher[host] = 1
 
         self.harvest()
@@ -176,7 +180,8 @@ class FileObj(SearchObj):
             if match:
                 last_offset = self.offset
                 self.offset = match.group(1)
-                #if last_offset == "EOF": self.debug("Got %d lines, new offset: %s" % (len(lines), self.offset))
+                #if last_offset == "EOF": self.debug("Got %d lines, new offset: %s" % (len(outLines), self.offset))
+                self.debug("Got %d lines, new offset: %s  %s" % (len(outLines), self.offset, repr(self.delegate)))
 
             elif re.search("^CTSwatcher:.*truncated", line):
                 self.log(line)
@@ -199,7 +204,7 @@ class FileObj(SearchObj):
 
         global log_watcher_bin
         return self.rsh.call_async(self.host,
-                "python %s -t %s -p CTSwatcher: -l 200 -f %s -o %s" % (log_watcher_bin, self.name, self.filename, self.offset),
+                                   "python %s -t %s -p CTSwatcher: -l 200 -f %s -o %s -t %s" % (log_watcher_bin, self.name, self.filename, self.offset, self.name),
                 completionDelegate=self)
 
     def setend(self):
@@ -208,7 +213,7 @@ class FileObj(SearchObj):
 
         global log_watcher_bin
         (rc, lines) = self.rsh(self.host,
-                 "python %s -t %s -p CTSwatcher: -l 2 -f %s -o %s" % (log_watcher_bin, self.name, self.filename, "EOF"),
+                               "python %s -t %s -p CTSwatcher: -l 2 -f %s -o %s -t %s" % (log_watcher_bin, self.name, self.filename, "EOF", self.name),
                  None, silent=True)
 
         for line in lines:
@@ -386,7 +391,7 @@ class LogWatcher(RemoteExec):
 
     def async_complete(self, pid, returncode, outLines, errLines):
         # TODO: Probably need a lock for updating self.line_cache
-        self.logger.debug("%s: Got %d lines from %d" % (self.name, len(outLines), pid))
+        self.logger.debug("%s: Got %d lines from %d (total %d)" % (self.name, len(outLines), pid, len(self.line_cache)))
         if len(outLines):
             self.cache_lock.acquire()
             self.line_cache.extend(outLines)
@@ -407,7 +412,7 @@ class LogWatcher(RemoteExec):
         for t in pending:
             t.join(60.0)
             if t.isAlive():
-                self.logger.log("%s: Aborting after 20s waiting for %d logging commands" % (self.name, repr(t)))
+                self.logger.log("%s: Aborting after 20s waiting for %s logging commands" % (self.name, repr(t)))
                 return
 
         #print "Got %d lines" % len(self.line_cache)
@@ -484,9 +489,6 @@ class LogWatcher(RemoteExec):
                 if len(self.line_cache) == 0 and end < time.time():
                     self.debug("Single search terminated: start=%d, end=%d, now=%d, lines=%d" % (begin, end, time.time(), lines))
                     return None
-                elif len(self.line_cache) == 0:
-                    self.debug("Single search timed out: start=%d, end=%d, now=%d, lines=%d" % (begin, end, time.time(), lines))
-                    return None
                 else:
                     self.debug("Waiting: start=%d, end=%d, now=%d, lines=%d" % (begin, end, time.time(), len(self.line_cache)))
                     time.sleep(1)
@@ -520,6 +522,7 @@ class LogWatcher(RemoteExec):
                 self.unmatched = self.regexes
                 self.matched = returnresult
                 self.regexes = save_regexes
+                self.end()
                 return None
 
             returnresult.append(oneresult)
diff --git a/extra/resources/remote b/extra/resources/remote
index 9e0482b..9f141a2 100644
--- a/extra/resources/remote
+++ b/extra/resources/remote
@@ -62,11 +62,11 @@ meta_data() {
     </parameter>
   </parameters>
   <actions>
-    <action name="start"   timeout="15" />
-    <action name="stop"    timeout="15" />
-    <action name="monitor"    timeout="15" />
-    <action name="migrate_to"   timeout="15" />
-    <action name="migrate_from" timeout="15" />
+    <action name="start"   timeout="40" />
+    <action name="stop"    timeout="40" />
+    <action name="monitor"    timeout="30" />
+    <action name="migrate_to"   timeout="60" />
+    <action name="migrate_from" timeout="60" />
     <action name="meta-data"  timeout="5" />
   </actions>
 </resource-agent>
diff --git a/fencing/commands.c b/fencing/commands.c
index a4e9f30..577ea95 100644
--- a/fencing/commands.c
+++ b/fencing/commands.c
@@ -1094,7 +1094,10 @@ stonith_device_action(xmlNode * msg, char **output)
         device = g_hash_table_lookup(device_list, id);
     }
 
-    if (device) {
+    if (device && device->api_registered == FALSE) {
+        rc = -ENODEV;
+
+    } else if (device) {
         cmd = create_async_command(msg);
         if (cmd == NULL) {
             free_device(device);
diff --git a/fencing/main.c b/fencing/main.c
index 5ae36cf..b03659e 100644
--- a/fencing/main.c
+++ b/fencing/main.c
@@ -415,7 +415,7 @@ topology_remove_helper(const char *node, int level)
     xmlNode *data = create_xml_node(NULL, F_STONITH_LEVEL);
     xmlNode *notify_data = create_xml_node(NULL, STONITH_OP_LEVEL_DEL);
 
-    crm_xml_add(data, "origin", __FUNCTION__);
+    crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
     crm_xml_add_int(data, XML_ATTR_ID, level);
     crm_xml_add(data, F_STONITH_TARGET, node);
 
diff --git a/include/crm/services.h b/include/crm/services.h
index e8bc172..5310709 100644
--- a/include/crm/services.h
+++ b/include/crm/services.h
@@ -152,6 +152,7 @@ enum nagios_exitcode {
         int status;
         int sequence;
         int expected_rc;
+        int synchronous;
 
         char *stderr_data;
         char *stdout_data;
diff --git a/include/crm_internal.h b/include/crm_internal.h
index ba78da2..3eb88de 100644
--- a/include/crm_internal.h
+++ b/include/crm_internal.h
@@ -220,7 +220,7 @@ gboolean crm_remote_recv(crm_remote_t * remote, int total_timeout /*ms */ , int
 xmlNode *crm_remote_parse_buffer(crm_remote_t * remote);
 int crm_remote_tcp_connect(const char *host, int port);
 int crm_remote_tcp_connect_async(const char *host, int port, int timeout,       /*ms */
-                                 void *userdata, void (*callback) (void *userdata, int sock));
+                                 int *timer_id, void *userdata, void (*callback) (void *userdata, int sock));
 
 #  ifdef HAVE_GNUTLS_GNUTLS_H
 /*!
@@ -276,6 +276,7 @@ int crm_read_pidfile(const char *filename);
 #  define attrd_channel		T_ATTRD
 #  define F_ATTRD_KEY		"attr_key"
 #  define F_ATTRD_ATTRIBUTE	"attr_name"
+#  define F_ATTRD_REGEX 	"attr_regex"
 #  define F_ATTRD_TASK		"task"
 #  define F_ATTRD_VALUE		"attr_value"
 #  define F_ATTRD_SET		"attr_set"
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
index 9410506..24700e5 100644
--- a/lib/cluster/membership.c
+++ b/lib/cluster/membership.c
@@ -389,7 +389,9 @@ crm_find_peer(unsigned int id, const char *uname)
         }
 
     } else if(uname && by_id->uname) {
-        crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u", by_id->uname, by_name->uname, id);
+        crm_dump_peer_hash(LOG_INFO, __FUNCTION__);
+        crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
+        crm_abort(__FILE__, __FUNCTION__, __LINE__, "member weirdness", TRUE, TRUE);
 
     } else if(id && by_name->id) {
         crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
diff --git a/lib/common/ipc.c b/lib/common/ipc.c
index c1801a4..f26225f 100644
--- a/lib/common/ipc.c
+++ b/lib/common/ipc.c
@@ -806,7 +806,7 @@ crm_ipc_connect(crm_ipc_t * client)
 
 #ifdef HAVE_IPCS_GET_BUFFER_SIZE
     client->max_buf_size = qb_ipcc_get_buffer_size(client->ipc);
-    if (client->max_buf_size < client->buf_size) {
+    if (client->max_buf_size > client->buf_size) {
         free(client->buffer);
         client->buffer = calloc(1, client->max_buf_size);
         client->buf_size = client->max_buf_size;
diff --git a/lib/common/remote.c b/lib/common/remote.c
index 0a7cd93..e2492b9 100644
--- a/lib/common/remote.c
+++ b/lib/common/remote.c
@@ -737,11 +737,12 @@ check_connect_finished(gpointer userdata)
 static int
 internal_tcp_connect_async(int sock,
                            const struct sockaddr *addr, socklen_t addrlen, int timeout /* ms */ ,
-                           void *userdata, void (*callback) (void *userdata, int sock))
+                           int *timer_id, void *userdata, void (*callback) (void *userdata, int sock))
 {
     int rc = 0;
     int flag = 0;
     int interval = 500;
+    int timer;
     struct tcp_async_cb_data *cb_data = NULL;
 
     if ((flag = fcntl(sock, F_GETFL)) >= 0) {
@@ -782,7 +783,10 @@ internal_tcp_connect_async(int sock,
      * Something about the way mainloop is currently polling prevents this from working at the
      * moment though. */
     crm_trace("fd %d: scheduling to check if connect finished in %dms second", sock, interval);
-    g_timeout_add(interval, check_connect_finished, cb_data);
+    timer = g_timeout_add(interval, check_connect_finished, cb_data);
+    if (timer_id) {
+        *timer_id = timer;
+    }
 
     return 0;
 }
@@ -809,10 +813,11 @@ internal_tcp_connect(int sock, const struct sockaddr *addr, socklen_t addrlen)
  * \internal
  * \brief tcp connection to server at specified port
  * \retval negative, failed to connect.
+ * \retval positive, sock fd
  */
 int
-crm_remote_tcp_connect_async(const char *host, int port, int timeout,   /*ms */
-                             void *userdata, void (*callback) (void *userdata, int sock))
+crm_remote_tcp_connect_async(const char *host, int port, int timeout, /*ms */
+                             int *timer_id, void *userdata, void (*callback) (void *userdata, int sock))
 {
     char buffer[256];
     struct addrinfo *res = NULL;
@@ -877,8 +882,7 @@ crm_remote_tcp_connect_async(const char *host, int port, int timeout,   /*ms */
 
         if (callback) {
             if (internal_tcp_connect_async
-                (sock, rp->ai_addr, rp->ai_addrlen, timeout, userdata, callback) == 0) {
-                sock = 0;
+                (sock, rp->ai_addr, rp->ai_addrlen, timeout, timer_id, userdata, callback) == 0) {
                 goto async_cleanup; /* Success for now, we'll hear back later in the callback */
             }
 
@@ -903,5 +907,5 @@ async_cleanup:
 int
 crm_remote_tcp_connect(const char *host, int port)
 {
-    return crm_remote_tcp_connect_async(host, port, -1, NULL, NULL);
+    return crm_remote_tcp_connect_async(host, port, -1, NULL, NULL, NULL);
 }
diff --git a/lib/common/utils.c b/lib/common/utils.c
index e559c51..dc54e6d 100644
--- a/lib/common/utils.c
+++ b/lib/common/utils.c
@@ -2005,6 +2005,9 @@ attrd_update_delegate(crm_ipc_t * ipc, char command, const char *host, const cha
     }
 
     switch (command) {
+        case 'u':
+            crm_xml_add(update, F_ATTRD_TASK, "update");
+            crm_xml_add(update, F_ATTRD_REGEX, name);
         case 'D':
         case 'U':
         case 'v':
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index 2837682..06b9492 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -192,7 +192,7 @@ create_device_registration_xml(const char *id, const char *namespace, const char
 #endif
 
     crm_xml_add(data, XML_ATTR_ID, id);
-    crm_xml_add(data, "origin", __FUNCTION__);
+    crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
     crm_xml_add(data, "agent", agent);
     crm_xml_add(data, "namespace", namespace);
     if (rsc_provides) {
@@ -229,7 +229,7 @@ stonith_api_remove_device(stonith_t * st, int call_options, const char *name)
     xmlNode *data = NULL;
 
     data = create_xml_node(NULL, F_STONITH_DEVICE);
-    crm_xml_add(data, "origin", __FUNCTION__);
+    crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
     crm_xml_add(data, XML_ATTR_ID, name);
     rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0);
     free_xml(data);
@@ -244,7 +244,7 @@ stonith_api_remove_level(stonith_t * st, int options, const char *node, int leve
     xmlNode *data = NULL;
 
     data = create_xml_node(NULL, F_STONITH_LEVEL);
-    crm_xml_add(data, "origin", __FUNCTION__);
+    crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
     crm_xml_add(data, F_STONITH_TARGET, node);
     crm_xml_add_int(data, XML_ATTR_ID, level);
     rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0);
@@ -260,7 +260,7 @@ create_level_registration_xml(const char *node, int level, stonith_key_value_t *
 
     crm_xml_add_int(data, XML_ATTR_ID, level);
     crm_xml_add(data, F_STONITH_TARGET, node);
-    crm_xml_add(data, "origin", __FUNCTION__);
+    crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
 
     for (; device_list; device_list = device_list->next) {
         xmlNode *dev = create_xml_node(data, F_STONITH_DEVICE);
@@ -1255,7 +1255,7 @@ stonith_api_query(stonith_t * stonith, int call_options, const char *target,
     CRM_CHECK(devices != NULL, return -EINVAL);
 
     data = create_xml_node(NULL, F_STONITH_DEVICE);
-    crm_xml_add(data, "origin", __FUNCTION__);
+    crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
     crm_xml_add(data, F_STONITH_TARGET, target);
     crm_xml_add(data, F_STONITH_ACTION, "off");
     rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout);
@@ -1296,7 +1296,7 @@ stonith_api_call(stonith_t * stonith,
     xmlNode *data = NULL;
 
     data = create_xml_node(NULL, F_STONITH_DEVICE);
-    crm_xml_add(data, "origin", __FUNCTION__);
+    crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
     crm_xml_add(data, F_STONITH_DEVICE, id);
     crm_xml_add(data, F_STONITH_ACTION, action);
     crm_xml_add(data, F_STONITH_TARGET, victim);
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
index 3496098..b8c5d23 100644
--- a/lib/lrmd/lrmd_client.c
+++ b/lib/lrmd/lrmd_client.c
@@ -89,6 +89,9 @@ typedef struct lrmd_private_s {
     int port;
     gnutls_psk_client_credentials_t psk_cred_c;
 
+    /* while the async connection is occuring, this is the id
+     * of the connection timeout timer. */
+    int async_timer;
     int sock;
     /* since tls requires a round trip across the network for a
      * request/reply, there are times where we just want to be able
@@ -1101,6 +1104,8 @@ lrmd_tcp_connect_cb(void *userdata, int sock)
     int rc = sock;
     gnutls_datum_t psk_key = { NULL, 0 };
 
+    native->async_timer = 0;
+
     if (rc < 0) {
         lrmd_tls_connection_destroy(lrmd);
         crm_info("remote lrmd connect to %s at port %d failed", native->server, native->port);
@@ -1152,14 +1157,23 @@ lrmd_tcp_connect_cb(void *userdata, int sock)
 static int
 lrmd_tls_connect_async(lrmd_t * lrmd, int timeout /*ms */ )
 {
-    int rc = 0;
+    int rc = -1;
+    int sock = 0;
+    int timer_id = 0;
+
     lrmd_private_t *native = lrmd->private;
 
     lrmd_gnutls_global_init();
 
-    rc = crm_remote_tcp_connect_async(native->server, native->port, timeout, lrmd,
+    sock = crm_remote_tcp_connect_async(native->server, native->port, timeout, &timer_id, lrmd,
                                       lrmd_tcp_connect_cb);
 
+    if (sock != -1) {
+        native->sock = sock;
+        rc = 0;
+        native->async_timer = timer_id;
+    }
+
     return rc;
 }
 
@@ -1319,6 +1333,11 @@ lrmd_tls_disconnect(lrmd_t * lrmd)
         native->remote->tls_session = 0;
     }
 
+    if (native->async_timer) {
+        g_source_remove(native->async_timer);
+        native->async_timer = 0;
+    }
+
     if (native->source != NULL) {
         /* Attached to mainloop */
         mainloop_del_ipc_client(native->source);
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index b699201..7127c12 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -1756,6 +1756,7 @@ process_rsc_state(resource_t * rsc, node_t * node,
     if (rsc->role > RSC_ROLE_STOPPED
         && node->details->online == FALSE && is_set(rsc->flags, pe_rsc_managed)) {
 
+        char *reason = NULL;
         gboolean should_fence = FALSE;
 
         /* if this is a remote_node living in a container, fence the container
@@ -1768,14 +1769,25 @@ process_rsc_state(resource_t * rsc, node_t * node,
 
             should_fence = TRUE;
         } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
+            if (is_baremetal_remote_node(node) && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) {
+                /* setting unceen = true means that fencing of the remote node will
+                 * only occur if the connection resource is not going to start somewhere.
+                 * This allows connection resources on a failed cluster-node to move to
+                 * another node without requiring the baremetal remote nodes to be fenced
+                 * as well. */
+                node->details->unseen = TRUE;
+                reason = g_strdup_printf("because %s is active there. Fencing will be revoked if remote-node connection can be re-established on another cluster-node.", rsc->id);
+            }
             should_fence = TRUE;
         }
 
         if (should_fence) {
-            char *reason = g_strdup_printf("because %s is thought to be active there", rsc->id);
+            if (reason == NULL) {
+               reason = g_strdup_printf("because %s is thought to be active there", rsc->id);
+            }
             pe_fence_node(data_set, node, reason);
-            g_free(reason);
         }
+        g_free(reason);
     }
 
     if (node->details->unclean) {
@@ -1840,6 +1852,17 @@ process_rsc_state(resource_t * rsc, node_t * node,
             break;
     }
 
+    /* ensure a remote-node connection failure forces an unclean remote-node
+     * to be fenced. By setting unseen = FALSE, the remote-node failure will
+     * result in a fencing operation regardless if we're going to attempt to 
+     * reconnect to the remote-node in this transition or not. */
+    if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) {
+        node_t *tmpnode = pe_find_node(data_set->nodes, rsc->id);
+        if (tmpnode->details->unclean) {
+            tmpnode->details->unseen = FALSE;
+        }
+    }
+
     if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
         if (is_set(rsc->flags, pe_rsc_orphan)) {
             if (is_set(rsc->flags, pe_rsc_managed)) {
@@ -2160,7 +2183,7 @@ unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * d
     for (gIter = unexpected_containers; gIter != NULL; gIter = gIter->next) {
         remote = (resource_t *) gIter->data;
         if (remote->role != RSC_ROLE_STARTED) {
-            crm_warn("Recovering container resource %s. Resource is unexpectedly running and involves a remote-node.");
+            crm_warn("Recovering container resource %s. Resource is unexpectedly running and involves a remote-node.", remote->container->id);
             set_bit(remote->container->flags, pe_rsc_failed);
         }
     }
diff --git a/lib/services/dbus.c b/lib/services/dbus.c
index 8b8aee1..587589c 100644
--- a/lib/services/dbus.c
+++ b/lib/services/dbus.c
@@ -6,6 +6,14 @@
 
 #define BUS_PROPERTY_IFACE "org.freedesktop.DBus.Properties"
 
+struct db_getall_data
+{
+        char *name;
+        char *target;
+        char *object;
+        void *userdata;
+        void (*callback)(const char *name, const char *value, void *userdata);
+};
 
 static bool pcmk_dbus_error_check(DBusError *err, const char *prefix, const char *function, int line) 
 {
@@ -107,8 +115,9 @@ DBusMessage *pcmk_dbus_send_recv(DBusMessage *msg, DBusConnection *connection, D
     method = dbus_message_get_member (msg);
 
     // send message and get a handle for a reply
-    if (!dbus_connection_send_with_reply (connection, msg, &pending, -1)) { // -1 is default timeout
+    if (!dbus_connection_send_with_reply (connection, msg, &pending, -1/* aka. DBUS_TIMEOUT_USE_DEFAULT */)) {
         if(error) {
+            dbus_error_init(error);
             error->message = "Call to dbus_connection_send_with_reply() failed";
             error->name = "org.clusterlabs.pacemaker.SendFailed";
         }
@@ -126,13 +135,7 @@ DBusMessage *pcmk_dbus_send_recv(DBusMessage *msg, DBusConnection *connection, D
         reply = dbus_pending_call_steal_reply(pending);
     }
 
-    if(pcmk_dbus_find_error(method, pending, reply, error)) {
-        crm_trace("Was error: '%s' '%s'", error->name, error->message);
-        if(reply) {
-            dbus_message_unref(reply);
-            reply = NULL;
-        }
-    }
+    pcmk_dbus_find_error(method, pending, reply, error);
 
     if(pending) {
         /* free the pending message handle */
@@ -156,7 +159,7 @@ bool pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection,
     method = dbus_message_get_member (msg);
 
     // send message and get a handle for a reply
-    if (!dbus_connection_send_with_reply (connection, msg, &pending, -1)) { // -1 is default timeout
+    if (!dbus_connection_send_with_reply (connection, msg, &pending, -1/* aka. DBUS_TIMEOUT_USE_DEFAULT */)) { // -1 is default timeout
         crm_err("Send with reply failed for %s", method);
         return FALSE;
 
@@ -205,65 +208,38 @@ bool pcmk_dbus_type_check(DBusMessage *msg, DBusMessageIter *field, int expected
 
         dbus_message_iter_init(msg, &args);
         do_crm_log_alias(LOG_ERR, __FILE__, function, line,
-                         "Unexepcted DBus type, expected %c instead of %c in '%s'",
-                         expected, dtype, dbus_message_iter_get_signature(&args));
+                         "Unexepcted DBus type, expected %c in '%s' instead of %c",
+                         expected, dbus_message_iter_get_signature(&args), dtype);
         return FALSE;
     }
 
     return TRUE;
 }
 
-char *
-pcmk_dbus_get_property(
-    DBusConnection *connection, const char *target, const char *obj, const gchar * iface, const char *name)
+static char *
+pcmk_dbus_lookup_result(DBusMessage *reply, struct db_getall_data *data)
 {
-    DBusMessage *msg;
-    DBusMessageIter args;
-    DBusMessageIter dict;
-    DBusMessage *reply = NULL;
-    /* DBusBasicValue value; */
-    const char *method = "GetAll";
-    char *output = NULL;
     DBusError error;
+    char *output = NULL;
+    DBusMessageIter dict;
+    DBusMessageIter args;
 
-        /* desc = systemd_unit_property(path, BUS_NAME ".Unit", "Description"); */
-
-    dbus_error_init(&error);
-    crm_info("Calling: %s on %s", method, target);
-    msg = dbus_message_new_method_call(target, // target for the method call
-                                       obj, // object to call on
-                                       BUS_PROPERTY_IFACE, // interface to call on
-                                       method); // method name
-
-    if (NULL == msg) {
-        crm_err("Call to %s failed: No message", method);
-        return NULL;
-    }
-
-    CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &iface, DBUS_TYPE_INVALID));
-
-    reply = pcmk_dbus_send_recv(msg, connection, &error);
-    dbus_message_unref(msg);
-
-    if(error.name) {
-        crm_err("Call to %s for %s failed: No reply", method, iface);
-        return NULL;
-
-    } else if (!dbus_message_iter_init(reply, &args)) {
-        crm_err("Cannot get properties for %s from %s", obj, iface);
-        return NULL;
+    if(pcmk_dbus_find_error("GetAll", (void*)&error, reply, &error)) {
+        crm_err("Cannot get properties from %s for %s", data->target, data->object);
+        goto cleanup;
     }
 
+    dbus_message_iter_init(reply, &args);
     if(!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY, __FUNCTION__, __LINE__)) {
-        crm_err("Call to %s failed: Message has invalid arguments", method);
-        dbus_message_unref(reply);
-        return NULL;
+        crm_err("Invalid reply from %s for %s", data->target, data->object);
+        goto cleanup;
     }
 
     dbus_message_iter_recurse(&args, &dict);
     while (dbus_message_iter_get_arg_type (&dict) != DBUS_TYPE_INVALID) {
         DBusMessageIter sv;
         DBusMessageIter v;
+        DBusBasicValue name;
         DBusBasicValue value;
 
         if(!pcmk_dbus_type_check(reply, &dict, DBUS_TYPE_DICT_ENTRY, __FUNCTION__, __LINE__)) {
@@ -277,10 +253,9 @@ pcmk_dbus_get_property(
 
             switch(dtype) {
                 case DBUS_TYPE_STRING:
-                    dbus_message_iter_get_basic(&sv, &value);
+                    dbus_message_iter_get_basic(&sv, &name);
 
-                    crm_trace("Got: %s", value.str);
-                    if(strcmp(value.str, name) != 0) {
+                    if(data->name && strcmp(name.str, data->name) != 0) {
                         dbus_message_iter_next (&sv); /* Skip the value */
                     }
                     break;
@@ -289,8 +264,17 @@ pcmk_dbus_get_property(
                     if(pcmk_dbus_type_check(reply, &v, DBUS_TYPE_STRING, __FUNCTION__, __LINE__)) {
                         dbus_message_iter_get_basic(&v, &value);
 
-                        crm_trace("Result: %s", value.str);
-                        output = strdup(value.str);
+                        crm_trace("Property %s[%s] is '%s'", data->object, name.str, value.str);
+                        if(data->callback) {
+                            data->callback(name.str, value.str, data->userdata);
+
+                        } else {
+                            output = strdup(value.str);
+                        }
+
+                        if(data->name) {
+                            goto cleanup;
+                        }
                     }
                     break;
                 default:
@@ -302,8 +286,82 @@ pcmk_dbus_get_property(
         dbus_message_iter_next (&dict);
     }
 
+  cleanup:
+    free(data->target);
+    free(data->object);
+    free(data->name);
+    free(data);
+
+    return output;
+}
+
+static void
+pcmk_dbus_lookup_cb(DBusPendingCall *pending, void *user_data)
+{
+    DBusMessage *reply = NULL;
+
+    if(pending) {
+        reply = dbus_pending_call_steal_reply(pending);
+    }
+
+    pcmk_dbus_lookup_result(reply, user_data);
+
+    if(reply) {
+        dbus_message_unref(reply);
+    }
+}
+
+char *
+pcmk_dbus_get_property(
+    DBusConnection *connection, const char *target, const char *obj, const gchar * iface, const char *name,
+    void (*callback)(const char *name, const char *value, void *userdata), void *userdata)
+{
+    DBusMessage *msg;
+    const char *method = "GetAll";
+    char *output = NULL;
+
+    struct db_getall_data *query_data = NULL;
+
+    /* char *state = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, unit, BUS_NAME ".Unit", "ActiveState"); */
+
+    crm_debug("Calling: %s on %s", method, target);
+    msg = dbus_message_new_method_call(target, // target for the method call
+                                       obj, // object to call on
+                                       BUS_PROPERTY_IFACE, // interface to call on
+                                       method); // method name
+
+    if (NULL == msg) {
+        crm_err("Call to %s failed: No message", method);
+        return NULL;
+    }
+
+    CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &iface, DBUS_TYPE_INVALID));
+
+    query_data = malloc(sizeof(struct db_getall_data));
+    query_data->target = strdup(target);
+    query_data->object = strdup(obj);
+    query_data->callback = callback;
+    query_data->userdata = userdata;
+    query_data->name = NULL;
+
+    if(name) {
+        query_data->name = strdup(name);
+    }
+
+    if(query_data->callback) {
+        pcmk_dbus_send(msg, connection, pcmk_dbus_lookup_cb, query_data);
+
+    } else {
+        DBusMessage *reply = pcmk_dbus_send_recv(msg, connection, NULL);
+
+        output = pcmk_dbus_lookup_result(reply, query_data);
+        if(reply) {
+            dbus_message_unref(reply);
+        }
+    }
+
+    dbus_message_unref(msg);
 
-    crm_trace("Property %s[%s] is '%s'", obj, name, output);
     return output;
 }
 
@@ -354,6 +412,14 @@ pcmk_dbus_watch_add(DBusWatch *watch, void *data){
 }
 
 static void
+pcmk_dbus_watch_toggle(DBusWatch *watch, void *data)
+{
+    mainloop_io_t *client = dbus_watch_get_data(watch);
+    crm_notice("DBus client %p is now %s", client, dbus_watch_get_enabled(watch)?"enabled":"disabled");
+}
+
+
+static void
 pcmk_dbus_watch_remove(DBusWatch *watch, void *data){
     mainloop_io_t *client = dbus_watch_get_data(watch);
 
@@ -404,7 +470,7 @@ pcmk_dbus_timeout_toggle(DBusTimeout *timeout, void *data){
 void pcmk_dbus_connection_setup_with_select(DBusConnection *c){
 	dbus_connection_set_timeout_functions(
             c, pcmk_dbus_timeout_add, pcmk_dbus_timeout_remove, pcmk_dbus_timeout_toggle, NULL, NULL);
-	dbus_connection_set_watch_functions(c, pcmk_dbus_watch_add, pcmk_dbus_watch_remove, NULL, NULL, NULL);
+	dbus_connection_set_watch_functions(c, pcmk_dbus_watch_add, pcmk_dbus_watch_remove, pcmk_dbus_watch_toggle, NULL, NULL);
 	dbus_connection_set_dispatch_status_function(c, pcmk_dbus_connection_dispatch, NULL, NULL);
 
 	pcmk_dbus_connection_dispatch(c, dbus_connection_get_dispatch_status(c), NULL);
diff --git a/lib/services/pcmk-dbus.h b/lib/services/pcmk-dbus.h
index 3b7a598..ed80c5f 100644
--- a/lib/services/pcmk-dbus.h
+++ b/lib/services/pcmk-dbus.h
@@ -6,7 +6,9 @@ bool pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection,
                     void(*done)(DBusPendingCall *pending, void *user_data), void *user_data);
 DBusMessage *pcmk_dbus_send_recv(DBusMessage *msg, DBusConnection *connection, DBusError *error);
 bool pcmk_dbus_type_check(DBusMessage *msg, DBusMessageIter *field, int expected, const char *function, int line);
-char *pcmk_dbus_get_property(DBusConnection *connection, const char *target, const char *obj, const gchar * iface, const char *name);
+char *pcmk_dbus_get_property(
+    DBusConnection *connection, const char *target, const char *obj, const gchar * iface, const char *name,
+    void (*callback)(const char *name, const char *value, void *userdata), void *userdata);
 
 bool pcmk_dbus_find_error(const char *method, DBusPendingCall* pending, DBusMessage *reply, DBusError *error);
 
diff --git a/lib/services/services.c b/lib/services/services.c
index 7b32405..8590b56 100644
--- a/lib/services/services.c
+++ b/lib/services/services.c
@@ -473,6 +473,7 @@ handle_duplicate_recurring(svc_action_t * op, void (*action_callback) (svc_actio
 gboolean
 services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *))
 {
+    op->synchronous = false;
     if (action_callback) {
         op->opaque->callback = action_callback;
     }
@@ -491,7 +492,7 @@ services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *
     }
     if (op->standard && strcasecmp(op->standard, "systemd") == 0) {
 #if SUPPORT_SYSTEMD
-        return systemd_unit_exec(op, FALSE);
+        return systemd_unit_exec(op);
 #endif
     }
     return services_os_action_execute(op, FALSE);
@@ -502,6 +503,7 @@ services_action_sync(svc_action_t * op)
 {
     gboolean rc = TRUE;
 
+    op->synchronous = true;
     if (op == NULL) {
         crm_trace("No operation to execute");
         return FALSE;
@@ -512,7 +514,7 @@ services_action_sync(svc_action_t * op)
 #endif
     } else if (op->standard && strcasecmp(op->standard, "systemd") == 0) {
 #if SUPPORT_SYSTEMD
-        rc = systemd_unit_exec(op, TRUE);
+        rc = systemd_unit_exec(op);
 #endif
     } else {
         rc = services_os_action_execute(op, TRUE);
diff --git a/lib/services/systemd.c b/lib/services/systemd.c
index e81d178..c967430 100644
--- a/lib/services/systemd.c
+++ b/lib/services/systemd.c
@@ -35,6 +35,9 @@
 /*
    /usr/share/dbus-1/interfaces/org.freedesktop.systemd1.Manager.xml
 */
+gboolean
+systemd_unit_exec_with_unit(svc_action_t * op, const char *unit);
+
 
 struct unit_info {
     const char *id;
@@ -49,6 +52,15 @@ struct unit_info {
     const char *job_path;
 };
 
+struct pcmk_dbus_data 
+{
+        char *name;
+        char *unit;
+        DBusError error;
+        svc_action_t *op;
+        void (*callback)(DBusMessage *reply, svc_action_t *op);
+};
+
 static DBusMessage *systemd_new_method(const char *iface, const char *method)
 {
     crm_trace("Calling: %s on %s", method, iface);
@@ -101,6 +113,7 @@ systemd_service_name(const char *name)
 static bool
 systemd_daemon_reload(void)
 {
+    /* TODO: Make this asynchronous */
     const char *method = "Reload";
     DBusMessage *reply = NULL;
     DBusMessage *msg = systemd_new_method(BUS_NAME".Manager", method);
@@ -114,21 +127,55 @@ systemd_daemon_reload(void)
     return TRUE;
 }
 
-static gboolean
-systemd_unit_by_name(const gchar * arg_name, gchar ** out_unit)
+static const char *
+systemd_loadunit_result(DBusMessage *reply, svc_action_t * op)
+{
+    const char *path = NULL;
+
+    if(pcmk_dbus_find_error("LoadUnit", (void*)&path, reply, NULL)) {
+        if(op) {
+            crm_warn("No unit found for %s", op->rsc);
+        }
+
+    } else if(pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __FUNCTION__, __LINE__)) {
+        dbus_message_get_args (reply, NULL,
+                               DBUS_TYPE_OBJECT_PATH, &path,
+                               DBUS_TYPE_INVALID);
+    }
+
+    if(op) {
+        systemd_unit_exec_with_unit(op, path);
+    }
+
+    return path;
+}
+
+
+static void
+systemd_loadunit_cb(DBusPendingCall *pending, void *user_data)
+{
+    DBusMessage *reply = NULL;
+
+    if(pending) {
+        reply = dbus_pending_call_steal_reply(pending);
+    }
+
+    systemd_loadunit_result(reply, user_data);
+
+    if(reply) {
+        dbus_message_unref(reply);
+    }
+}
+
+static char *
+systemd_unit_by_name(const gchar * arg_name, svc_action_t *op)
 {
     DBusMessage *msg;
     DBusMessage *reply = NULL;
-    const char *method = "GetUnit";
     char *name = NULL;
-    DBusError error;
 
 /*
-  <method name="GetUnit">
-   <arg name="name" type="s" direction="in"/>
-   <arg name="unit" type="o" direction="out"/>
-  </method>
-
+  Equivalent to GetUnit if its already loaded
   <method name="LoadUnit">
    <arg name="name" type="s" direction="in"/>
    <arg name="unit" type="o" direction="out"/>
@@ -139,51 +186,34 @@ systemd_unit_by_name(const gchar * arg_name, gchar ** out_unit)
         return FALSE;
     }
 
-    name = systemd_service_name(arg_name);
+    msg = systemd_new_method(BUS_NAME".Manager", "LoadUnit");
+    CRM_ASSERT(msg != NULL);
 
-    while(TRUE) {
-        msg = systemd_new_method(BUS_NAME".Manager", method);
-        CRM_ASSERT(msg != NULL);
+    name = systemd_service_name(arg_name);
+    CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID));
+    free(name);
 
-        CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID));
+    if(op == NULL || op->synchronous) {
+        const char *unit = NULL;
+        char *munit = NULL;
+        DBusError error;
 
         dbus_error_init(&error);
         reply = pcmk_dbus_send_recv(msg, systemd_proxy, &error);
         dbus_message_unref(msg);
 
-        if(error.name) {
-            crm_info("Call to %s failed: %s", method, error.name);
-
-        } else if(pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __FUNCTION__, __LINE__)) {
-            if(out_unit) {
-                char *path = NULL;
-
-                dbus_message_get_args (reply, NULL,
-                                       DBUS_TYPE_OBJECT_PATH, &path,
-                                       DBUS_TYPE_INVALID);
-
-                *out_unit = strdup(path);
-            }
-            dbus_message_unref(reply);
-            free(name);
-            return TRUE;
+        unit = systemd_loadunit_result(reply, op);
+        if(unit) {
+            munit = strdup(unit);
         }
-
-        if(strcmp(method, "LoadUnit") != 0) {
-            method = "LoadUnit";
-            crm_debug("Cannot find %s, reloading the systemd manager configuration", name);
-            systemd_daemon_reload();
-            if(reply) {
-                dbus_message_unref(reply);
-                reply = NULL;
-            }
-
-        } else {
-            free(name);
-            return FALSE;
+        if(reply) {
+            dbus_message_unref(reply);
         }
+        return munit;
     }
-    return FALSE;
+
+    pcmk_dbus_send(msg, systemd_proxy, systemd_loadunit_cb, op);
+    return NULL;
 }
 
 GList *
@@ -220,6 +250,10 @@ systemd_unit_listall(void)
         crm_err("Call to %s failed: %s", method, error.name);
         return NULL;
 
+    } else if (reply == NULL) {
+        crm_err("Call to %s failed: Message has no reply", method);
+        return NULL;
+
     } else if (!dbus_message_iter_init(reply, &args)) {
         crm_err("Call to %s failed: Message has no arguments", method);
         dbus_message_unref(reply);
@@ -269,21 +303,27 @@ systemd_unit_listall(void)
 gboolean
 systemd_unit_exists(const char *name)
 {
-    return systemd_unit_by_name(name, NULL);
+    /* Note: Makes a blocking dbus calls
+     * Used by resources_find_service_class() when resource class=service
+     */
+    if(systemd_unit_by_name(name, NULL)) {
+        return TRUE;
+    }
+    return FALSE;
 }
 
 static char *
 systemd_unit_metadata(const char *name)
 {
-    char *path = NULL;
     char *meta = NULL;
     char *desc = NULL;
+    char *path = systemd_unit_by_name(name, NULL);
 
-    if (systemd_unit_by_name(name, &path)) {
-        CRM_ASSERT(path);
-        desc = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, path, BUS_NAME ".Unit", "Description");
+    if (path) {
+        /* TODO: Worth a making blocking call for? Probably not. Possibly if cached. */
+        desc = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, path, BUS_NAME ".Unit", "Description", NULL, NULL);
     } else {
-        desc = g_strdup_printf("systemd unit file for %s", name);
+        desc = g_strdup_printf("Systemd unit file for %s", name);
     }
 
     meta = g_strdup_printf("<?xml version=\"1.0\"?>\n"
@@ -335,24 +375,15 @@ systemd_mask_error(svc_action_t *op, const char *error)
 }
 
 static void
-systemd_async_dispatch(DBusPendingCall *pending, void *user_data)
+systemd_exec_result(DBusMessage *reply, svc_action_t *op)
 {
     DBusError error;
-    DBusMessage *reply = NULL;
-    svc_action_t *op = user_data;
 
-    dbus_error_init(&error);
-    if(pending) {
-        reply = dbus_pending_call_steal_reply(pending);
-    }
-    if(reply == NULL) {
-        crm_err("No reply for %s action on %s", op->action, op->rsc);
-
-    } else if(pcmk_dbus_find_error(op->action, pending, reply, &error)) {
+    if(pcmk_dbus_find_error(op->action, (void*)&error, reply, &error)) {
 
         /* ignore "already started" or "not running" errors */
         if (!systemd_mask_error(op, error.name)) {
-            crm_err("%s for %s: %s", op->action, op->rsc, error.message);
+            crm_err("Could not issue %s for %s: %s (%s)", op->action, op->rsc, error.message);
         }
 
     } else {
@@ -372,6 +403,21 @@ systemd_async_dispatch(DBusPendingCall *pending, void *user_data)
     }
 
     operation_finalize(op);
+}
+
+static void
+systemd_async_dispatch(DBusPendingCall *pending, void *user_data)
+{
+    DBusError error;
+    DBusMessage *reply = NULL;
+    svc_action_t *op = user_data;
+
+    dbus_error_init(&error);
+    if(pending) {
+        reply = dbus_pending_call_steal_reply(pending);
+    }
+
+    systemd_exec_result(reply, op);
 
     if(pending) {
         dbus_pending_call_unref(pending);
@@ -383,61 +429,56 @@ systemd_async_dispatch(DBusPendingCall *pending, void *user_data)
 
 #define SYSTEMD_OVERRIDE_ROOT "/run/systemd/system/"
 
+static void
+systemd_unit_check(const char *name, const char *state, void *userdata)
+{
+    svc_action_t * op = userdata;
+    
+    CRM_ASSERT(state != NULL);
+
+    if (g_strcmp0(state, "active") == 0) {
+        op->rc = PCMK_OCF_OK;
+    } else if (g_strcmp0(state, "activating") == 0) {
+        op->rc = PCMK_OCF_PENDING;
+    } else {
+        op->rc = PCMK_OCF_NOT_RUNNING;
+    }
+
+    if (op->synchronous == FALSE) {
+        operation_finalize(op);
+    }
+}
+
 gboolean
-systemd_unit_exec(svc_action_t * op, gboolean synchronous)
+systemd_unit_exec_with_unit(svc_action_t * op, const char *unit)
 {
-    DBusError error;
-    char *unit = NULL;
-    const char *replace_s = "replace";
-    gboolean pass = FALSE;
     const char *method = op->action;
-    char *name = systemd_service_name(op->agent);
     DBusMessage *msg = NULL;
     DBusMessage *reply = NULL;
 
-    dbus_error_init(&error);
-    op->rc = PCMK_OCF_UNKNOWN_ERROR;
-    CRM_ASSERT(systemd_init());
-
-    crm_debug("Performing %ssynchronous %s op on systemd unit %s named '%s'",
-              synchronous ? "" : "a", op->action, op->agent, op->rsc);
-
-    if (safe_str_eq(op->action, "meta-data")) {
-        op->stdout_data = systemd_unit_metadata(op->agent);
-        op->rc = PCMK_OCF_OK;
-        goto cleanup;
-    }
+    CRM_ASSERT(unit);
 
-    pass = systemd_unit_by_name(op->agent, &unit);
-    if (pass == FALSE) {
+    if (unit == NULL) {
         crm_debug("Could not obtain unit named '%s'", op->agent);
-#if 0
-        if (error && strstr(error->message, "systemd1.NoSuchUnit")) {
-            op->rc = PCMK_OCF_NOT_INSTALLED;
-            op->status = PCMK_LRM_OP_NOT_INSTALLED;
-        }
-#endif
+        op->rc = PCMK_OCF_NOT_INSTALLED;
+        op->status = PCMK_LRM_OP_NOT_INSTALLED;
         goto cleanup;
     }
 
     if (safe_str_eq(op->action, "monitor") || safe_str_eq(method, "status")) {
-        char *state = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, unit, BUS_NAME ".Unit", "ActiveState");
-
-        if (g_strcmp0(state, "active") == 0) {
-            op->rc = PCMK_OCF_OK;
-        } else if (g_strcmp0(state, "activating") == 0) {
-            op->rc = PCMK_OCF_PENDING;
-        } else {
-            op->rc = PCMK_OCF_NOT_RUNNING;
+        char *state = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, unit, BUS_NAME ".Unit", "ActiveState",
+                                             op->synchronous?NULL:systemd_unit_check, op);
+        if (op->synchronous) {
+            systemd_unit_check("ActiveState", state, op);
+            free(state);
+            return op->rc == PCMK_OCF_OK;
         }
-
-        free(state);
-        goto cleanup;
+        return TRUE;
 
     } else if (g_strcmp0(method, "start") == 0) {
         FILE *file_strm = NULL;
         char *override_dir = g_strdup_printf("%s/%s", SYSTEMD_OVERRIDE_ROOT, unit);
-        char *override_file = g_strdup_printf("%s/50-pacemaker.conf", override_dir);
+        char *override_file = g_strdup_printf("%s/%s/50-pacemaker.conf", SYSTEMD_OVERRIDE_ROOT, unit);
 
         method = "StartUnit";
         crm_build_path(override_dir, 0755);
@@ -446,11 +487,11 @@ systemd_unit_exec(svc_action_t * op, gboolean synchronous)
         if (file_strm != NULL) {
             int rc = fprintf(file_strm, "[Service]\nRestart=no");
             if (rc < 0) {
-                crm_perror(LOG_ERR, "Cannot write to systemd override file %s: %s (%d)", override_file, pcmk_strerror(errno), errno);
+                crm_perror(LOG_ERR, "Cannot write to systemd override file %s", override_file);
             }
 
         } else {
-            crm_err("Cannot open systemd override file %s for writing: %s (%d)", override_file, pcmk_strerror(errno), errno);
+            crm_err("Cannot open systemd override file %s for writing", override_file);
         }
 
         if (file_strm != NULL) {
@@ -471,6 +512,7 @@ systemd_unit_exec(svc_action_t * op, gboolean synchronous)
 
     } else if (g_strcmp0(method, "restart") == 0) {
         method = "RestartUnit";
+
     } else {
         op->rc = PCMK_OCF_UNIMPLEMENT_FEATURE;
         goto cleanup;
@@ -482,54 +524,66 @@ systemd_unit_exec(svc_action_t * op, gboolean synchronous)
     CRM_ASSERT(msg != NULL);
 
     /* (ss) */
-    CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID));
-    CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &replace_s, DBUS_TYPE_INVALID));
+    {
+        const char *replace_s = "replace";
+        char *name = systemd_service_name(op->agent);
+
+        CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID));
+        CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &replace_s, DBUS_TYPE_INVALID));
 
-    if (synchronous == FALSE) {
-        free(unit);
         free(name);
-        return pcmk_dbus_send(msg, systemd_proxy, systemd_async_dispatch, op);
     }
 
-    dbus_error_init(&error);
-    reply = pcmk_dbus_send_recv(msg, systemd_proxy, &error);
-
-    if(error.name) {
-        /* ignore "already started" or "not running" errors */
-        if(!systemd_mask_error(op, error.name)) {
-            crm_err("Could not issue %s for %s: %s (%s)", method, op->rsc, error.name, unit);
-        }
-        goto cleanup;
-
-    } else if(!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __FUNCTION__, __LINE__)) {
-        crm_warn("Call to %s passed but return type was unexpected", op->action);
-        op->rc = PCMK_OCF_OK;
+    if (op->synchronous == FALSE) {
+        return pcmk_dbus_send(msg, systemd_proxy, systemd_async_dispatch, op);
 
     } else {
-        const char *path = NULL;
+        DBusError error;
 
-        dbus_message_get_args (reply, NULL,
-                               DBUS_TYPE_OBJECT_PATH, &path,
-                               DBUS_TYPE_INVALID);
-        crm_info("Call to %s passed: %s", op->action, path);
-        op->rc = PCMK_OCF_OK;
+        reply = pcmk_dbus_send_recv(msg, systemd_proxy, &error);
+        systemd_exec_result(reply, op);
+        if(reply) {
+            dbus_message_unref(reply);
+        }
     }
 
-  cleanup:
-    free(unit);
-    free(name);
-
     if(msg) {
         dbus_message_unref(msg);
     }
 
-    if(reply) {
-        dbus_message_unref(reply);
+  cleanup:
+    if (op->synchronous == FALSE) {
+        operation_finalize(op);
+        return TRUE;
     }
 
-    if (synchronous == FALSE) {
-        operation_finalize(op);
+    return op->rc == PCMK_OCF_OK;
+}
+
+gboolean
+systemd_unit_exec(svc_action_t * op)
+{
+    CRM_ASSERT(op);
+    CRM_ASSERT(systemd_init());
+    op->rc = PCMK_OCF_UNKNOWN_ERROR;
+    crm_debug("Performing %ssynchronous %s op on systemd unit %s named '%s'",
+              op->synchronous ? "" : "a", op->action, op->agent, op->rsc);
+
+    if (safe_str_eq(op->action, "meta-data")) {
+        /* TODO: See if we can teach the lrmd not to make these calls synchronously */
+        op->stdout_data = systemd_unit_metadata(op->agent);
+        op->rc = PCMK_OCF_OK;
+
+        if (op->synchronous == FALSE) {
+            operation_finalize(op);
+        }
         return TRUE;
     }
+
+    systemd_unit_by_name(op->agent, op);
+    if (op->synchronous == FALSE) {
+        return TRUE;
+    }
+
     return op->rc == PCMK_OCF_OK;
 }
diff --git a/lib/services/systemd.h b/lib/services/systemd.h
index 6e1b80b..c86bafe 100644
--- a/lib/services/systemd.h
+++ b/lib/services/systemd.h
@@ -17,7 +17,7 @@
  */
 
 G_GNUC_INTERNAL GList *systemd_unit_listall(void);
-G_GNUC_INTERNAL int systemd_unit_exec(svc_action_t * op, gboolean synchronous);
+G_GNUC_INTERNAL int systemd_unit_exec(svc_action_t * op);
 G_GNUC_INTERNAL gboolean systemd_unit_exists(const gchar * name);
 G_GNUC_INTERNAL gboolean systemd_unit_running(const gchar * name);
 G_GNUC_INTERNAL void systemd_cleanup(void);
diff --git a/lib/services/upstart.c b/lib/services/upstart.c
index f47e8ff..4c7211d 100644
--- a/lib/services/upstart.c
+++ b/lib/services/upstart.c
@@ -275,6 +275,10 @@ get_first_instance(const gchar * job)
         crm_err("Call to %s failed: %s", method, error.name);
         goto done;
 
+    } else if(reply == NULL) {
+        crm_err("Call to %s failed: no reply", method);
+        goto done;
+
     } else if (!dbus_message_iter_init(reply, &args)) {
         crm_err("Call to %s failed: Message has no arguments", method);
         goto done;
@@ -304,31 +308,22 @@ get_first_instance(const gchar * job)
     return instance;
 }
 
-gboolean
-upstart_job_running(const gchar * name)
+static void
+upstart_job_check(const char *name, const char *state, void *userdata)
 {
-    bool running = FALSE;
-    char *job = NULL;
-
-    if(upstart_job_by_name(name, &job)) {
-        char *path = get_first_instance(job);
+    svc_action_t * op = userdata;
 
-        if (path) {
-            char *state = pcmk_dbus_get_property(
-                upstart_proxy, BUS_NAME, path, UPSTART_06_API ".Instance", "state");
-
-            crm_info("State of %s: %s", name, state);
-            if (state) {
-                running = !g_strcmp0(state, "running");
-            }
-            free(state);
-        }
-        free(path);
+    if (state && g_strcmp0(state, "running") == 0) {
+        op->rc = PCMK_OCF_OK;
+    /* } else if (g_strcmp0(state, "activating") == 0) { */
+    /*     op->rc = PCMK_OCF_PENDING; */
+    } else {
+        op->rc = PCMK_OCF_NOT_RUNNING;
     }
 
-    free(job);
-    crm_info("%s is%s running", name, running ? "" : " not");
-    return running;
+    if (op->synchronous == FALSE) {
+        operation_finalize(op);
+    }
 }
 
 static char *
@@ -465,10 +460,24 @@ upstart_job_exec(svc_action_t * op, gboolean synchronous)
     }
 
     if (safe_str_eq(op->action, "monitor") || safe_str_eq(action, "status")) {
-        if (upstart_job_running(op->agent)) {
-            op->rc = PCMK_OCF_OK;
-        } else {
-            op->rc = PCMK_OCF_NOT_RUNNING;
+
+        char *path = get_first_instance(job);
+
+        op->rc = PCMK_OCF_NOT_RUNNING;
+        if(path) {
+            char *state = pcmk_dbus_get_property(
+                upstart_proxy, BUS_NAME, path, UPSTART_06_API ".Instance", "state",
+                op->synchronous?NULL:upstart_job_check, op);
+
+            free(job);
+            free(path);
+
+            if(op->synchronous) {
+                upstart_job_check("state", state, op);
+                free(state);
+                return op->rc == PCMK_OCF_OK;
+            }
+            return TRUE;
         }
         goto cleanup;
 
@@ -503,7 +512,7 @@ upstart_job_exec(svc_action_t * op, gboolean synchronous)
 
     CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_BOOLEAN, &arg_wait, DBUS_TYPE_INVALID));
 
-    if (synchronous == FALSE) {
+    if (op->synchronous == FALSE) {
         free(job);
         return pcmk_dbus_send(msg, upstart_proxy, upstart_async_dispatch, op);
     }
@@ -545,7 +554,7 @@ upstart_job_exec(svc_action_t * op, gboolean synchronous)
         dbus_message_unref(reply);
     }
 
-    if (synchronous == FALSE) {
+    if (op->synchronous == FALSE) {
         operation_finalize(op);
         return TRUE;
     }
diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c
index f3abfdb..7075b9f 100644
--- a/lrmd/lrmd.c
+++ b/lrmd/lrmd.c
@@ -874,6 +874,12 @@ stonith_action_complete(lrmd_cmd_t * cmd, int rc)
     if (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED) {
         recurring = 0;
         /* do nothing */
+
+    } else if (rc == -ENODEV && safe_str_eq(cmd->action, "monitor")) {
+        /* Not registered == inactive */
+        cmd->lrmd_op_status = PCMK_LRM_OP_DONE;
+        cmd->exec_rc = PCMK_OCF_NOT_RUNNING;
+
     } else if (rc) {
         /* Attempt to map return codes to op status if possible */
         switch (rc) {
@@ -884,6 +890,7 @@ stonith_action_complete(lrmd_cmd_t * cmd, int rc)
                 cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT;
                 break;
             default:
+                /* TODO: This looks wrong.  Status should be _DONE and exec_rc set to an error */
                 cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
         }
     } else {
diff --git a/lrmd/regression.py.in b/lrmd/regression.py.in
index b6b6718..a9a32ef 100755
--- a/lrmd/regression.py.in
+++ b/lrmd/regression.py.in
@@ -240,6 +240,13 @@ class Tests:
                 self.action_timeout = " -t 5000 "
 		if self.tls:
 			self.rsc_classes.remove("stonith")
+		if "systemd" in self.rsc_classes:
+			# the lrmd_dummy_daemon requires this, we are importing it
+			# here just to guarantee it is installed before allowing this
+			# script to run. Otherwise, running without this import being
+			# available will make all the systemd tests look like they fail,
+			# which is really scary looking. I'd rather see the import fail.
+			import systemd.daemon
 
 		print "Testing "+repr(self.rsc_classes)
 
diff --git a/mcp/pacemaker.combined.upstart.in b/mcp/pacemaker.combined.upstart.in
index 9540019..6301d10 100644
--- a/mcp/pacemaker.combined.upstart.in
+++ b/mcp/pacemaker.combined.upstart.in
@@ -30,6 +30,9 @@ pre-start script
     # give it time to fail.
     sleep 2
     pidof corosync || { exit 1; }
+
+    # if you use crm_mon, uncomment the line below.
+    #start crm_mon
 end script
 
 post-start script
@@ -59,6 +62,9 @@ post-stop script
     # and invalidate above "respawn" stanza.
     #pidof crmd && killall -q -9 corosync
 
+    # if you use crm_mon, uncomment the line below.
+    #stop crm_mon
+
     # if you use corosync-notifyd, uncomment the line below.
     #stop corosync-notifyd || true
 end script
diff --git a/pacemaker.spec.in b/pacemaker.spec.in
index bee6bfc..597fb3a 100644
--- a/pacemaker.spec.in
+++ b/pacemaker.spec.in
@@ -283,11 +283,13 @@ make DESTDIR=%{buildroot} docdir=%{pcmk_docdir} V=1 install
 mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig
 mkdir -p ${RPM_BUILD_ROOT}%{_var}/lib/pacemaker/cores
 install -m 644 mcp/pacemaker.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/pacemaker
+install -m 644 tools/crm_mon.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/crm_mon
 
 %if %{with upstart_job}
 mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/init
 install -m 644 mcp/pacemaker.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/pacemaker.conf
 install -m 644 mcp/pacemaker.combined.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/pacemaker.combined.conf
+install -m 644 tools/crm_mon.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/crm_mon.conf
 %endif
 
 # Scripts that should be executable
@@ -395,6 +397,7 @@ exit 0
 %exclude %{_datadir}/pacemaker/tests
 
 %config(noreplace) %{_sysconfdir}/sysconfig/pacemaker
+%config(noreplace) %{_sysconfdir}/sysconfig/crm_mon
 %config(noreplace) %{_sysconfdir}/logrotate.d/pacemaker
 %{_sbindir}/pacemakerd
 
@@ -451,6 +454,7 @@ exit 0
 %if %{with upstart_job}
 %config(noreplace) %{_sysconfdir}/init/pacemaker.conf
 %config(noreplace) %{_sysconfdir}/init/pacemaker.combined.conf
+%config(noreplace) %{_sysconfdir}/init/crm_mon.conf
 %endif
 
 %files cli
diff --git a/pengine/allocate.c b/pengine/allocate.c
index f9f9f3c..8d02d9b 100644
--- a/pengine/allocate.c
+++ b/pengine/allocate.c
@@ -1680,16 +1680,41 @@ apply_remote_node_ordering(pe_working_set_t *data_set)
                 action,
                 pe_order_preserve | pe_order_implies_then | pe_order_runnable_left,
                 data_set);
-
         } else if (safe_str_eq(action->task, "stop")) {
-            custom_action_order(action->rsc,
-                NULL,
-                action,
-                remote_rsc,
-                generate_op_key(remote_rsc->id, RSC_STOP, 0),
-                NULL,
-                pe_order_preserve | pe_order_implies_first,
-                data_set);
+            gboolean after_start = FALSE;
+
+            /* handle special case with baremetal remote where stop actions need to be
+             * ordered after the connection resource starts somewhere else. */
+            if (is_baremetal_remote_node(action->node)) {
+                node_t *cluster_node = remote_rsc->running_on ? remote_rsc->running_on->data : NULL;
+
+                /* if the current cluster node a baremetal connection resource
+                 * is residing on is unclean, we can't process any operations on that
+                 * remote node until after it starts somewhere else. */
+                if (cluster_node && cluster_node->details->unclean == TRUE) {
+                    after_start = TRUE;
+                }
+            }
+
+            if (after_start) {
+                custom_action_order(remote_rsc,
+                    generate_op_key(remote_rsc->id, RSC_START, 0),
+                    NULL,
+                    action->rsc,
+                    NULL,
+                    action,
+                    pe_order_preserve | pe_order_implies_then | pe_order_runnable_left,
+                    data_set);
+            } else {
+                custom_action_order(action->rsc,
+                    NULL,
+                    action,
+                    remote_rsc,
+                    generate_op_key(remote_rsc->id, RSC_STOP, 0),
+                    NULL,
+                    pe_order_preserve | pe_order_implies_first,
+                    data_set);
+            }
         }
     }
 }
diff --git a/pengine/regression.sh b/pengine/regression.sh
index 5f98215..bdc7d3a 100755
--- a/pengine/regression.sh
+++ b/pengine/regression.sh
@@ -762,9 +762,11 @@ echo ""
 do_test remote-startup-probes  "Baremetal remote-node startup probes"
 do_test remote-startup         "Startup a newly discovered remote-nodes with no status."
 do_test remote-fence-unclean   "Fence unclean baremetal remote-node"
+do_test remote-fence-unclean2  "Fence baremetal remote-node after cluster node fails and connection can not be recovered"
 do_test remote-move            "Move remote-node connection resource"
 do_test remote-disable         "Disable a baremetal remote-node"
 do_test remote-orphaned        "Properly shutdown orphaned connection resource"
+do_test remote-recover         "Recover connection resource after cluster-node fails."
 do_test remote-stale-node-entry "Make sure we properly handle leftover remote-node entries in the node section"
 echo ""
 test_results
diff --git a/pengine/test10/remote-fence-unclean2.dot b/pengine/test10/remote-fence-unclean2.dot
new file mode 100644
index 0000000..6cff564
--- /dev/null
+++ b/pengine/test10/remote-fence-unclean2.dot
@@ -0,0 +1,10 @@
+digraph "g" {
+"all_stopped" [ style=bold color="green" fontcolor="orange"]
+"fake_stop_0 rhel7-alt4" -> "all_stopped" [ style = bold]
+"fake_stop_0 rhel7-alt4" [ style=bold color="green" fontcolor="orange"]
+"stonith 'reboot' rhel7-alt4" -> "fake_stop_0 rhel7-alt4" [ style = bold]
+"stonith 'reboot' rhel7-alt4" -> "stonith_complete" [ style = bold]
+"stonith 'reboot' rhel7-alt4" [ style=bold color="green" fontcolor="black"]
+"stonith_complete" -> "all_stopped" [ style = bold]
+"stonith_complete" [ style=bold color="green" fontcolor="orange"]
+}
diff --git a/pengine/test10/remote-fence-unclean2.exp b/pengine/test10/remote-fence-unclean2.exp
new file mode 100644
index 0000000..e58b617
--- /dev/null
+++ b/pengine/test10/remote-fence-unclean2.exp
@@ -0,0 +1,49 @@
+<transition_graph cluster-delay="60s" stonith-timeout="60s" failed-stop-offset="INFINITY" failed-start-offset="INFINITY"  transition_id="0">
+  <synapse id="0">
+    <action_set>
+      <pseudo_event id="6" operation="stop" operation_key="fake_stop_0">
+        <attributes CRM_meta_name="stop" CRM_meta_timeout="20000" />
+      </pseudo_event>
+    </action_set>
+    <inputs>
+      <trigger>
+        <crm_event id="8" operation="stonith" operation_key="stonith-rhel7-alt4-reboot" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4"/>
+      </trigger>
+    </inputs>
+  </synapse>
+  <synapse id="1">
+    <action_set>
+      <crm_event id="8" operation="stonith" operation_key="stonith-rhel7-alt4-reboot" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4">
+        <attributes CRM_meta_last_failure_fake="1411503989" CRM_meta_on_node="rhel7-alt4" CRM_meta_on_node_uuid="rhel7-alt4" CRM_meta_probe_complete="true" CRM_meta_stonith_action="reboot" />
+      </crm_event>
+    </action_set>
+    <inputs/>
+  </synapse>
+  <synapse id="2">
+    <action_set>
+      <pseudo_event id="7" operation="stonith_complete" operation_key="stonith_complete">
+        <attributes />
+      </pseudo_event>
+    </action_set>
+    <inputs>
+      <trigger>
+        <crm_event id="8" operation="stonith" operation_key="stonith-rhel7-alt4-reboot" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4"/>
+      </trigger>
+    </inputs>
+  </synapse>
+  <synapse id="3">
+    <action_set>
+      <pseudo_event id="1" operation="all_stopped" operation_key="all_stopped">
+        <attributes />
+      </pseudo_event>
+    </action_set>
+    <inputs>
+      <trigger>
+        <pseudo_event id="6" operation="stop" operation_key="fake_stop_0"/>
+      </trigger>
+      <trigger>
+        <pseudo_event id="7" operation="stonith_complete" operation_key="stonith_complete"/>
+      </trigger>
+    </inputs>
+  </synapse>
+</transition_graph>
diff --git a/pengine/test10/remote-fence-unclean2.scores b/pengine/test10/remote-fence-unclean2.scores
new file mode 100644
index 0000000..10fc7fd
--- /dev/null
+++ b/pengine/test10/remote-fence-unclean2.scores
@@ -0,0 +1,13 @@
+Allocation scores:
+native_color: fake allocation score on rhel7-alt1: 0
+native_color: fake allocation score on rhel7-alt2: 0
+native_color: fake allocation score on rhel7-alt3: 0
+native_color: fake allocation score on rhel7-alt4: INFINITY
+native_color: rhel7-alt4 allocation score on rhel7-alt1: 0
+native_color: rhel7-alt4 allocation score on rhel7-alt2: 0
+native_color: rhel7-alt4 allocation score on rhel7-alt3: 0
+native_color: rhel7-alt4 allocation score on rhel7-alt4: -INFINITY
+native_color: shooter allocation score on rhel7-alt1: 0
+native_color: shooter allocation score on rhel7-alt2: 0
+native_color: shooter allocation score on rhel7-alt3: 0
+native_color: shooter allocation score on rhel7-alt4: -INFINITY
diff --git a/pengine/test10/remote-fence-unclean2.summary b/pengine/test10/remote-fence-unclean2.summary
new file mode 100644
index 0000000..bfaf77b
--- /dev/null
+++ b/pengine/test10/remote-fence-unclean2.summary
@@ -0,0 +1,30 @@
+
+Current cluster status:
+Node rhel7-alt1 (1): standby
+Node rhel7-alt2 (2): standby
+RemoteNode rhel7-alt4: UNCLEAN (offline)
+OFFLINE: [ rhel7-alt3 ]
+
+ shooter	(stonith:fence_xvm):	Stopped 
+ rhel7-alt4	(ocf::pacemaker:remote):	Stopped 
+ fake	(ocf::heartbeat:Dummy):	Started rhel7-alt4 
+
+Transition Summary:
+ * Stop    fake	(rhel7-alt4)
+
+Executing cluster transition:
+ * Fencing rhel7-alt4 (reboot)
+ * Pseudo action:   stonith_complete
+ * Pseudo action:   fake_stop_0
+ * Pseudo action:   all_stopped
+
+Revised cluster status:
+Node rhel7-alt1 (1): standby
+Node rhel7-alt2 (2): standby
+OFFLINE: [ rhel7-alt3 ]
+RemoteOFFLINE: [ rhel7-alt4 ]
+
+ shooter	(stonith:fence_xvm):	Stopped 
+ rhel7-alt4	(ocf::pacemaker:remote):	Stopped 
+ fake	(ocf::heartbeat:Dummy):	Stopped 
+
diff --git a/pengine/test10/remote-fence-unclean2.xml b/pengine/test10/remote-fence-unclean2.xml
new file mode 100644
index 0000000..78fc4f1
--- /dev/null
+++ b/pengine/test10/remote-fence-unclean2.xml
@@ -0,0 +1,115 @@
+<cib crm_feature_set="3.0.9" validate-with="pacemaker-2.1" epoch="13" num_updates="8" admin_epoch="0" cib-last-written="Tue Sep 23 16:28:22 2014" have-quorum="1" dc-uuid="2">
+  <configuration>
+    <crm_config>
+      <cluster_property_set id="cib-bootstrap-options">
+        <nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.1.12-6da3f72"/>
+        <nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="corosync"/>
+        <nvpair id="cib-bootstrap-options-cluster-name" name="cluster-name" value="phd"/>
+        <nvpair id="cib-bootstrap-options-last-lrm-refresh" name="last-lrm-refresh" value="1411504087"/>
+      </cluster_property_set>
+    </crm_config>
+    <nodes>
+      <node id="1" uname="rhel7-alt1">
+        <instance_attributes id="nodes-1">
+          <nvpair id="nodes-1-standby" name="standby" value="on"/>
+        </instance_attributes>
+      </node>
+      <node id="2" uname="rhel7-alt2">
+        <instance_attributes id="nodes-2">
+          <nvpair id="nodes-2-standby" name="standby" value="on"/>
+        </instance_attributes>
+      </node>
+      <node id="3" uname="rhel7-alt3"/>
+    </nodes>
+    <resources>
+      <primitive class="stonith" id="shooter" type="fence_xvm">
+        <instance_attributes id="shooter-instance_attributes"/>
+        <operations>
+          <op id="shooter-monitor-interval-60s" interval="60s" name="monitor"/>
+        </operations>
+      </primitive>
+      <primitive class="ocf" id="rhel7-alt4" provider="pacemaker" type="remote">
+        <instance_attributes id="rhel7-alt4-instance_attributes"/>
+        <operations>
+          <op id="rhel7-alt4-start-timeout-15" interval="0s" name="start" timeout="15"/>
+          <op id="rhel7-alt4-stop-timeout-15" interval="0s" name="stop" timeout="15"/>
+          <op id="rhel7-alt4-monitor-timeout-15" interval="60s" name="monitor" timeout="15"/>
+        </operations>
+      </primitive>
+      <primitive class="ocf" id="fake" provider="heartbeat" type="Dummy">
+        <instance_attributes id="fake-instance_attributes"/>
+        <operations>
+          <op id="fake-start-timeout-20" interval="0s" name="start" timeout="20"/>
+          <op id="fake-stop-timeout-20" interval="0s" name="stop" timeout="20"/>
+          <op id="fake-monitor-interval-10" interval="10" name="monitor" timeout="20"/>
+        </operations>
+      </primitive>
+    </resources>
+    <constraints>
+      <rsc_location id="location-fake-rhel7-alt4-INFINITY" node="rhel7-alt4" rsc="fake" score="INFINITY"/>
+    </constraints>
+  </configuration>
+  <status>
+    <node_state id="2" uname="rhel7-alt2" in_ccm="true" crmd="online" crm-debug-origin="post_cache_update" join="member" expected="member">
+      <transient_attributes id="2">
+        <instance_attributes id="status-2">
+          <nvpair id="status-2-shutdown" name="shutdown" value="0"/>
+          <nvpair id="status-2-probe_complete" name="probe_complete" value="true"/>
+        </instance_attributes>
+      </transient_attributes>
+      <lrm id="2">
+        <lrm_resources>
+          <lrm_resource id="shooter" type="fence_xvm" class="stonith">
+            <lrm_rsc_op id="shooter_last_0" operation_key="shooter_stop_0" operation="stop" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.9" transition-key="11:8:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;11:8:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="14" rc-code="0" op-status="0" interval="0" last-run="1411503701" last-rc-change="1411503701" exec-time="1" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt2"/>
+          </lrm_resource>
+          <lrm_resource id="rhel7-alt4" type="remote" class="ocf" provider="pacemaker">
+            <lrm_rsc_op id="rhel7-alt4_last_0" operation_key="rhel7-alt4_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.9" transition-key="8:5:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;8:5:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="1" rc-code="7" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt2"/>
+          </lrm_resource>
+          <lrm_resource id="fake" type="Dummy" class="ocf" provider="heartbeat">
+            <lrm_rsc_op id="fake_last_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="8:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;8:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="19" rc-code="7" op-status="0" interval="0" last-run="1411504086" last-rc-change="1411504086" exec-time="34" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt2" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+          </lrm_resource>
+        </lrm_resources>
+      </lrm>
+    </node_state>
+    <node_state id="1" uname="rhel7-alt1" in_ccm="true" crmd="online" crm-debug-origin="post_cache_update" join="member" expected="member">
+      <lrm id="1">
+        <lrm_resources>
+          <lrm_resource id="shooter" type="fence_xvm" class="stonith">
+            <lrm_rsc_op id="shooter_last_0" operation_key="shooter_stop_0" operation="stop" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="11:23:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;11:23:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="19" rc-code="0" op-status="0" interval="0" last-run="1411504102" last-rc-change="1411504102" exec-time="1" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt1"/>
+            <lrm_rsc_op id="shooter_monitor_60000" operation_key="shooter_monitor_60000" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="16:15:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;16:15:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="12" rc-code="0" op-status="0" interval="60000" last-rc-change="1411504079" exec-time="10" queue-time="0" op-digest="4811cef7f7f94e3a35a70be7916cb2fd" on_node="rhel7-alt1"/>
+          </lrm_resource>
+          <lrm_resource id="rhel7-alt4" type="remote" class="ocf" provider="pacemaker">
+            <lrm_rsc_op id="rhel7-alt4_last_0" operation_key="rhel7-alt4_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="9:15:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;9:15:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="1" rc-code="7" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt1"/>
+          </lrm_resource>
+          <lrm_resource id="fake" type="Dummy" class="ocf" provider="heartbeat">
+            <lrm_rsc_op id="fake_last_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="8:18:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;8:18:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="17" rc-code="7" op-status="0" interval="0" last-run="1411504087" last-rc-change="1411504087" exec-time="29" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt1" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+          </lrm_resource>
+        </lrm_resources>
+      </lrm>
+      <transient_attributes id="1">
+        <instance_attributes id="status-1">
+          <nvpair id="status-1-shutdown" name="shutdown" value="0"/>
+          <nvpair id="status-1-probe_complete" name="probe_complete" value="true"/>
+        </instance_attributes>
+      </transient_attributes>
+    </node_state>
+    <node_state id="3" uname="rhel7-alt3" in_ccm="false" crmd="offline" crm-debug-origin="send_stonith_update" join="down" expected="down"/>
+    <node_state id="rhel7-alt4" remote_node="true" uname="rhel7-alt4" crm-debug-origin="post_cache_update">
+      <lrm id="rhel7-alt4">
+        <lrm_resources>
+          <lrm_resource id="fake" type="Dummy" class="ocf" provider="heartbeat">
+            <lrm_rsc_op id="fake_last_failure_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="34" rc-code="0" op-status="0" interval="0" last-run="1411504087" last-rc-change="1411504087" exec-time="29" queue-time="1" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+            <lrm_rsc_op id="fake_last_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="34" rc-code="0" op-status="0" interval="0" last-run="1411504087" last-rc-change="1411504087" exec-time="29" queue-time="1" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt3" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+            <lrm_rsc_op id="fake_monitor_10000" operation_key="fake_monitor_10000" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="16:22:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;16:22:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="35" rc-code="0" op-status="0" interval="10000" last-rc-change="1411504087" exec-time="29" queue-time="0" op-digest="4811cef7f7f94e3a35a70be7916cb2fd" on_node="rhel7-alt3"/>
+          </lrm_resource>
+        </lrm_resources>
+      </lrm>
+      <transient_attributes id="rhel7-alt4">
+        <instance_attributes id="status-rhel7-alt4">
+          <nvpair id="status-rhel7-alt4-probe_complete" name="probe_complete" value="true"/>
+          <nvpair id="status-rhel7-alt4-last-failure-fake" name="last-failure-fake" value="1411503989"/>
+        </instance_attributes>
+      </transient_attributes>
+    </node_state>
+  </status>
+</cib>
diff --git a/pengine/test10/remote-recover.dot b/pengine/test10/remote-recover.dot
new file mode 100644
index 0000000..1da6a7b
--- /dev/null
+++ b/pengine/test10/remote-recover.dot
@@ -0,0 +1,17 @@
+ digraph "g" {
+"all_stopped" [ style=bold color="green" fontcolor="orange"]
+"fake_monitor_10000 rhel7-alt4" [ style=bold color="green" fontcolor="black"]
+"fake_start_0 rhel7-alt4" -> "fake_monitor_10000 rhel7-alt4" [ style = bold]
+"fake_start_0 rhel7-alt4" [ style=bold color="green" fontcolor="black"]
+"fake_stop_0 rhel7-alt4" -> "all_stopped" [ style = bold]
+"fake_stop_0 rhel7-alt4" -> "fake_start_0 rhel7-alt4" [ style = bold]
+"fake_stop_0 rhel7-alt4" [ style=bold color="green" fontcolor="black"]
+"rhel7-alt4_monitor_60000 rhel7-alt1" [ style=bold color="green" fontcolor="black"]
+"rhel7-alt4_start_0 rhel7-alt1" -> "fake_monitor_10000 rhel7-alt4" [ style = bold]
+"rhel7-alt4_start_0 rhel7-alt1" -> "fake_start_0 rhel7-alt4" [ style = bold]
+"rhel7-alt4_start_0 rhel7-alt1" -> "rhel7-alt4_monitor_60000 rhel7-alt1" [ style = bold]
+"rhel7-alt4_start_0 rhel7-alt1" [ style=bold color="green" fontcolor="black"]
+"shooter_monitor_60000 rhel7-alt1" [ style=bold color="green" fontcolor="black"]
+"shooter_start_0 rhel7-alt1" -> "shooter_monitor_60000 rhel7-alt1" [ style = bold]
+"shooter_start_0 rhel7-alt1" [ style=bold color="green" fontcolor="black"]
+}
diff --git a/pengine/test10/remote-recover.exp b/pengine/test10/remote-recover.exp
new file mode 100644
index 0000000..37e4f71
--- /dev/null
+++ b/pengine/test10/remote-recover.exp
@@ -0,0 +1,99 @@
+<transition_graph cluster-delay="60s" stonith-timeout="60s" failed-stop-offset="INFINITY" failed-start-offset="INFINITY"  transition_id="0">
+  <synapse id="0">
+    <action_set>
+      <rsc_op id="8" operation="monitor" operation_key="shooter_monitor_60000" on_node="rhel7-alt1" on_node_uuid="1">
+        <primitive id="shooter" class="stonith" type="fence_xvm"/>
+        <attributes CRM_meta_interval="60000" CRM_meta_name="monitor" CRM_meta_timeout="20000" />
+      </rsc_op>
+    </action_set>
+    <inputs>
+      <trigger>
+        <rsc_op id="7" operation="start" operation_key="shooter_start_0" on_node="rhel7-alt1" on_node_uuid="1"/>
+      </trigger>
+    </inputs>
+  </synapse>
+  <synapse id="1">
+    <action_set>
+      <rsc_op id="7" operation="start" operation_key="shooter_start_0" on_node="rhel7-alt1" on_node_uuid="1">
+        <primitive id="shooter" class="stonith" type="fence_xvm"/>
+        <attributes CRM_meta_timeout="20000" />
+      </rsc_op>
+    </action_set>
+    <inputs/>
+  </synapse>
+  <synapse id="2">
+    <action_set>
+      <rsc_op id="10" operation="monitor" operation_key="rhel7-alt4_monitor_60000" on_node="rhel7-alt1" on_node_uuid="1">
+        <primitive id="rhel7-alt4" class="ocf" provider="pacemaker" type="remote"/>
+        <attributes CRM_meta_interval="60000" CRM_meta_name="monitor" CRM_meta_timeout="15000" />
+      </rsc_op>
+    </action_set>
+    <inputs>
+      <trigger>
+        <rsc_op id="9" operation="start" operation_key="rhel7-alt4_start_0" on_node="rhel7-alt1" on_node_uuid="1"/>
+      </trigger>
+    </inputs>
+  </synapse>
+  <synapse id="3">
+    <action_set>
+      <rsc_op id="9" operation="start" operation_key="rhel7-alt4_start_0" on_node="rhel7-alt1" on_node_uuid="1">
+        <primitive id="rhel7-alt4" class="ocf" provider="pacemaker" type="remote"/>
+        <attributes CRM_meta_name="start" CRM_meta_timeout="15000" />
+      </rsc_op>
+    </action_set>
+    <inputs/>
+  </synapse>
+  <synapse id="4">
+    <action_set>
+      <rsc_op id="13" operation="monitor" operation_key="fake_monitor_10000" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4" router_node="rhel7-alt1">
+        <primitive id="fake" class="ocf" provider="heartbeat" type="Dummy"/>
+        <attributes CRM_meta_interval="10000" CRM_meta_name="monitor" CRM_meta_timeout="20000" />
+      </rsc_op>
+    </action_set>
+    <inputs>
+      <trigger>
+        <rsc_op id="9" operation="start" operation_key="rhel7-alt4_start_0" on_node="rhel7-alt1" on_node_uuid="1"/>
+      </trigger>
+      <trigger>
+        <rsc_op id="12" operation="start" operation_key="fake_start_0" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4" router_node="rhel7-alt1"/>
+      </trigger>
+    </inputs>
+  </synapse>
+  <synapse id="5">
+    <action_set>
+      <rsc_op id="12" operation="start" operation_key="fake_start_0" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4" router_node="rhel7-alt1">
+        <primitive id="fake" class="ocf" provider="heartbeat" type="Dummy"/>
+        <attributes CRM_meta_name="start" CRM_meta_timeout="20000" />
+      </rsc_op>
+    </action_set>
+    <inputs>
+      <trigger>
+        <rsc_op id="9" operation="start" operation_key="rhel7-alt4_start_0" on_node="rhel7-alt1" on_node_uuid="1"/>
+      </trigger>
+      <trigger>
+        <rsc_op id="11" operation="stop" operation_key="fake_stop_0" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4" router_node="rhel7-alt1"/>
+      </trigger>
+    </inputs>
+  </synapse>
+  <synapse id="6">
+    <action_set>
+      <rsc_op id="11" operation="stop" operation_key="fake_stop_0" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4" router_node="rhel7-alt1">
+        <primitive id="fake" class="ocf" provider="heartbeat" type="Dummy"/>
+        <attributes CRM_meta_name="stop" CRM_meta_timeout="20000" />
+      </rsc_op>
+    </action_set>
+    <inputs/>
+  </synapse>
+  <synapse id="7">
+    <action_set>
+      <pseudo_event id="1" operation="all_stopped" operation_key="all_stopped">
+        <attributes />
+      </pseudo_event>
+    </action_set>
+    <inputs>
+      <trigger>
+        <rsc_op id="11" operation="stop" operation_key="fake_stop_0" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4" router_node="rhel7-alt1"/>
+      </trigger>
+    </inputs>
+  </synapse>
+</transition_graph>
diff --git a/pengine/test10/remote-recover.scores b/pengine/test10/remote-recover.scores
new file mode 100644
index 0000000..10fc7fd
--- /dev/null
+++ b/pengine/test10/remote-recover.scores
@@ -0,0 +1,13 @@
+Allocation scores:
+native_color: fake allocation score on rhel7-alt1: 0
+native_color: fake allocation score on rhel7-alt2: 0
+native_color: fake allocation score on rhel7-alt3: 0
+native_color: fake allocation score on rhel7-alt4: INFINITY
+native_color: rhel7-alt4 allocation score on rhel7-alt1: 0
+native_color: rhel7-alt4 allocation score on rhel7-alt2: 0
+native_color: rhel7-alt4 allocation score on rhel7-alt3: 0
+native_color: rhel7-alt4 allocation score on rhel7-alt4: -INFINITY
+native_color: shooter allocation score on rhel7-alt1: 0
+native_color: shooter allocation score on rhel7-alt2: 0
+native_color: shooter allocation score on rhel7-alt3: 0
+native_color: shooter allocation score on rhel7-alt4: -INFINITY
diff --git a/pengine/test10/remote-recover.summary b/pengine/test10/remote-recover.summary
new file mode 100644
index 0000000..8fd7480
--- /dev/null
+++ b/pengine/test10/remote-recover.summary
@@ -0,0 +1,36 @@
+
+Current cluster status:
+Node rhel7-alt2 (2): standby
+RemoteNode rhel7-alt4: UNCLEAN (offline)
+Online: [ rhel7-alt1 ]
+OFFLINE: [ rhel7-alt3 ]
+
+ shooter	(stonith:fence_xvm):	Stopped 
+ rhel7-alt4	(ocf::pacemaker:remote):	Stopped 
+ fake	(ocf::heartbeat:Dummy):	Started rhel7-alt4 
+
+Transition Summary:
+ * Start   shooter	(rhel7-alt1)
+ * Start   rhel7-alt4	(rhel7-alt1)
+ * Restart fake	(Started rhel7-alt4)
+
+Executing cluster transition:
+ * Resource action: shooter         start on rhel7-alt1
+ * Resource action: rhel7-alt4      start on rhel7-alt1
+ * Resource action: fake            stop on rhel7-alt4
+ * Pseudo action:   all_stopped
+ * Resource action: shooter         monitor=60000 on rhel7-alt1
+ * Resource action: rhel7-alt4      monitor=60000 on rhel7-alt1
+ * Resource action: fake            start on rhel7-alt4
+ * Resource action: fake            monitor=10000 on rhel7-alt4
+
+Revised cluster status:
+Node rhel7-alt2 (2): standby
+Online: [ rhel7-alt1 ]
+OFFLINE: [ rhel7-alt3 ]
+RemoteOnline: [ rhel7-alt4 ]
+
+ shooter	(stonith:fence_xvm):	Started rhel7-alt1 
+ rhel7-alt4	(ocf::pacemaker:remote):	Started rhel7-alt1 
+ fake	(ocf::heartbeat:Dummy):	Started rhel7-alt4 
+
diff --git a/pengine/test10/remote-recover.xml b/pengine/test10/remote-recover.xml
new file mode 100644
index 0000000..1a83dd9
--- /dev/null
+++ b/pengine/test10/remote-recover.xml
@@ -0,0 +1,114 @@
+<cib crm_feature_set="3.0.9" validate-with="pacemaker-2.1" epoch="13" num_updates="8" admin_epoch="0" cib-last-written="Tue Sep 23 16:28:22 2014" have-quorum="1" dc-uuid="2">
+  <configuration>
+    <crm_config>
+      <cluster_property_set id="cib-bootstrap-options">
+        <nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.1.12-6da3f72"/>
+        <nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="corosync"/>
+        <nvpair id="cib-bootstrap-options-cluster-name" name="cluster-name" value="phd"/>
+        <nvpair id="cib-bootstrap-options-last-lrm-refresh" name="last-lrm-refresh" value="1411504087"/>
+      </cluster_property_set>
+    </crm_config>
+    <nodes>
+      <node id="1" uname="rhel7-alt1">
+        <instance_attributes id="nodes-1">
+        </instance_attributes>
+      </node>
+      <node id="2" uname="rhel7-alt2">
+        <instance_attributes id="nodes-2">
+          <nvpair id="nodes-2-standby" name="standby" value="on"/>
+        </instance_attributes>
+      </node>
+      <node id="3" uname="rhel7-alt3"/>
+    </nodes>
+    <resources>
+      <primitive class="stonith" id="shooter" type="fence_xvm">
+        <instance_attributes id="shooter-instance_attributes"/>
+        <operations>
+          <op id="shooter-monitor-interval-60s" interval="60s" name="monitor"/>
+        </operations>
+      </primitive>
+      <primitive class="ocf" id="rhel7-alt4" provider="pacemaker" type="remote">
+        <instance_attributes id="rhel7-alt4-instance_attributes"/>
+        <operations>
+          <op id="rhel7-alt4-start-timeout-15" interval="0s" name="start" timeout="15"/>
+          <op id="rhel7-alt4-stop-timeout-15" interval="0s" name="stop" timeout="15"/>
+          <op id="rhel7-alt4-monitor-timeout-15" interval="60s" name="monitor" timeout="15"/>
+        </operations>
+      </primitive>
+      <primitive class="ocf" id="fake" provider="heartbeat" type="Dummy">
+        <instance_attributes id="fake-instance_attributes"/>
+        <operations>
+          <op id="fake-start-timeout-20" interval="0s" name="start" timeout="20"/>
+          <op id="fake-stop-timeout-20" interval="0s" name="stop" timeout="20"/>
+          <op id="fake-monitor-interval-10" interval="10" name="monitor" timeout="20"/>
+        </operations>
+      </primitive>
+    </resources>
+    <constraints>
+      <rsc_location id="location-fake-rhel7-alt4-INFINITY" node="rhel7-alt4" rsc="fake" score="INFINITY"/>
+    </constraints>
+  </configuration>
+  <status>
+    <node_state id="2" uname="rhel7-alt2" in_ccm="true" crmd="online" crm-debug-origin="post_cache_update" join="member" expected="member">
+      <transient_attributes id="2">
+        <instance_attributes id="status-2">
+          <nvpair id="status-2-shutdown" name="shutdown" value="0"/>
+          <nvpair id="status-2-probe_complete" name="probe_complete" value="true"/>
+        </instance_attributes>
+      </transient_attributes>
+      <lrm id="2">
+        <lrm_resources>
+          <lrm_resource id="shooter" type="fence_xvm" class="stonith">
+            <lrm_rsc_op id="shooter_last_0" operation_key="shooter_stop_0" operation="stop" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.9" transition-key="11:8:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;11:8:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="14" rc-code="0" op-status="0" interval="0" last-run="1411503701" last-rc-change="1411503701" exec-time="1" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt2"/>
+          </lrm_resource>
+          <lrm_resource id="rhel7-alt4" type="remote" class="ocf" provider="pacemaker">
+            <lrm_rsc_op id="rhel7-alt4_last_0" operation_key="rhel7-alt4_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.9" transition-key="8:5:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;8:5:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="1" rc-code="7" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt2"/>
+          </lrm_resource>
+          <lrm_resource id="fake" type="Dummy" class="ocf" provider="heartbeat">
+            <lrm_rsc_op id="fake_last_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="8:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;8:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="19" rc-code="7" op-status="0" interval="0" last-run="1411504086" last-rc-change="1411504086" exec-time="34" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt2" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+          </lrm_resource>
+        </lrm_resources>
+      </lrm>
+    </node_state>
+    <node_state id="1" uname="rhel7-alt1" in_ccm="true" crmd="online" crm-debug-origin="post_cache_update" join="member" expected="member">
+      <lrm id="1">
+        <lrm_resources>
+          <lrm_resource id="shooter" type="fence_xvm" class="stonith">
+            <lrm_rsc_op id="shooter_last_0" operation_key="shooter_stop_0" operation="stop" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="11:23:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;11:23:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="19" rc-code="0" op-status="0" interval="0" last-run="1411504102" last-rc-change="1411504102" exec-time="1" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt1"/>
+            <lrm_rsc_op id="shooter_monitor_60000" operation_key="shooter_monitor_60000" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="16:15:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;16:15:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="12" rc-code="0" op-status="0" interval="60000" last-rc-change="1411504079" exec-time="10" queue-time="0" op-digest="4811cef7f7f94e3a35a70be7916cb2fd" on_node="rhel7-alt1"/>
+          </lrm_resource>
+          <lrm_resource id="rhel7-alt4" type="remote" class="ocf" provider="pacemaker">
+            <lrm_rsc_op id="rhel7-alt4_last_0" operation_key="rhel7-alt4_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="9:15:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;9:15:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="1" rc-code="7" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt1"/>
+          </lrm_resource>
+          <lrm_resource id="fake" type="Dummy" class="ocf" provider="heartbeat">
+            <lrm_rsc_op id="fake_last_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="8:18:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;8:18:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="17" rc-code="7" op-status="0" interval="0" last-run="1411504087" last-rc-change="1411504087" exec-time="29" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt1" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+          </lrm_resource>
+        </lrm_resources>
+      </lrm>
+      <transient_attributes id="1">
+        <instance_attributes id="status-1">
+          <nvpair id="status-1-shutdown" name="shutdown" value="0"/>
+          <nvpair id="status-1-probe_complete" name="probe_complete" value="true"/>
+        </instance_attributes>
+      </transient_attributes>
+    </node_state>
+    <node_state id="3" uname="rhel7-alt3" in_ccm="false" crmd="offline" crm-debug-origin="send_stonith_update" join="down" expected="down"/>
+    <node_state id="rhel7-alt4" remote_node="true" uname="rhel7-alt4" crm-debug-origin="post_cache_update">
+      <lrm id="rhel7-alt4">
+        <lrm_resources>
+          <lrm_resource id="fake" type="Dummy" class="ocf" provider="heartbeat">
+            <lrm_rsc_op id="fake_last_failure_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="34" rc-code="0" op-status="0" interval="0" last-run="1411504087" last-rc-change="1411504087" exec-time="29" queue-time="1" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+            <lrm_rsc_op id="fake_last_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="34" rc-code="0" op-status="0" interval="0" last-run="1411504087" last-rc-change="1411504087" exec-time="29" queue-time="1" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt3" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+            <lrm_rsc_op id="fake_monitor_10000" operation_key="fake_monitor_10000" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="16:22:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;16:22:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="35" rc-code="0" op-status="0" interval="10000" last-rc-change="1411504087" exec-time="29" queue-time="0" op-digest="4811cef7f7f94e3a35a70be7916cb2fd" on_node="rhel7-alt3"/>
+          </lrm_resource>
+        </lrm_resources>
+      </lrm>
+      <transient_attributes id="rhel7-alt4">
+        <instance_attributes id="status-rhel7-alt4">
+          <nvpair id="status-rhel7-alt4-probe_complete" name="probe_complete" value="true"/>
+          <nvpair id="status-rhel7-alt4-last-failure-fake" name="last-failure-fake" value="1411503989"/>
+        </instance_attributes>
+      </transient_attributes>
+    </node_state>
+  </status>
+</cib>
diff --git a/tools/crm_attribute.c b/tools/crm_attribute.c
index 60d39b6..c37b096 100644
--- a/tools/crm_attribute.c
+++ b/tools/crm_attribute.c
@@ -235,6 +235,7 @@ main(int argc, char **argv)
         /* we're updating cluster options - dont populate dest_node */
         type = XML_CIB_TAG_CRMCONFIG;
 
+    } else if (safe_str_eq(type, XML_CIB_TAG_CRMCONFIG)) {
     } else if (safe_str_neq(type, XML_CIB_TAG_TICKETS)) {
         if (dest_uname == NULL) {
             dest_uname = get_node_name(0);
diff --git a/tools/crm_mon.upstart.in b/tools/crm_mon.upstart.in
new file mode 100644
index 0000000..ef0fe7a
--- /dev/null
+++ b/tools/crm_mon.upstart.in
@@ -0,0 +1,39 @@
+# crm_mon - Daemon for pacemaker monitor
+#
+#
+
+kill timeout 3600
+respawn
+respawn limit 10 3600
+
+expect fork
+
+env prog=crm_mon
+env rpm_sysconf=@sysconfdir@/sysconfig/crm_mon
+env rpm_lockfile=@localstatedir@/lock/subsys/crm_mon
+env deb_sysconf=@sysconfdir@/default/crm_mon
+env deb_lockfile=@localstatedir@/lock/crm_mon
+
+
+script
+    [ -f "$rpm_sysconf" ] && . $rpm_sysconf
+    [ -f "$deb_sysconf" ] && . $deb_sysconf
+    exec $prog $OPTIONS
+end script
+
+post-start script
+    [ -f "$rpm_sysconf" ] && . $rpm_sysconf
+    [ -f "$deb_sysconf" ] && . $deb_sysconf
+    [ -z "$LOCK_FILE" -a -d @sysconfdir@/sysconfig ] && LOCK_FILE="$rpm_lockfile"
+    [ -z "$LOCK_FILE" -a -d @sysconfdir@/default ] && LOCK_FILE="$deb_lockfile"
+    touch $LOCK_FILE
+end script
+
+post-stop script
+    [ -f "$rpm_sysconf" ] && . $rpm_sysconf
+    [ -f "$deb_sysconf" ] && . $deb_sysconf
+    [ -z "$LOCK_FILE" -a -d @sysconfdir@/sysconfig ] && LOCK_FILE="$rpm_lockfile"
+    [ -z "$LOCK_FILE" -a -d @sysconfdir@/default ] && LOCK_FILE="$deb_lockfile"
+    rm -f $LOCK_FILE
+end script
+
diff --git a/tools/crm_resource.c b/tools/crm_resource.c
index 6537520..56583e0 100644
--- a/tools/crm_resource.c
+++ b/tools/crm_resource.c
@@ -2214,11 +2214,15 @@ main(int argc, char **argv)
         }
 
     } else if (rsc_cmd == 'C') {
-#if 0
+#if HAVE_ATOMIC_ATTRD
         xmlNode *cmd = create_request(CRM_OP_REPROBE, NULL, host_uname,
                                       CRM_SYSTEM_CRMD, crm_system_name, our_pid);
 
-        crm_debug("Re-checking the state of all resources on %s", host_uname);
+        crm_debug("Re-checking the state of all resources on %s", host_uname?host_uname:"all nodes");
+
+        rc = attrd_update_delegate(
+            NULL, 'u', host_uname, "fail-count-*", NULL, XML_CIB_TAG_STATUS, NULL, NULL, NULL, FALSE);
+
         if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) {
             start_mainloop();
         }