Blame SOURCES/pacemaker-rollup-be1e835.patch

1f016a
commit 0fa5ce2c14fa36610630469c14c07537eb4f4807
1f016a
Author: Andrew Beekhof <andrew@beekhof.net>
1f016a
Date:   Wed Oct 1 16:56:59 2014 +1000
1f016a
1f016a
    Import: pacemaker-rollup-be1e835
1f016a
1f016a
diff --git a/attrd/Makefile.am b/attrd/Makefile.am
1f016a
index 802a3fa..9d5e223 100644
1f016a
--- a/attrd/Makefile.am
1f016a
+++ b/attrd/Makefile.am
1f016a
@@ -32,25 +32,12 @@ attrd_LDADD	= $(top_builddir)/lib/cluster/libcrmcluster.la 		\
1f016a
 		$(top_builddir)/lib/cib/libcib.la			\
1f016a
 		$(CLUSTERLIBS)
1f016a
 
1f016a
-if BUILD_HEARTBEAT_SUPPORT
1f016a
-attrd_SOURCES	+= legacy.c
1f016a
-else
1f016a
-
1f016a
-if BUILD_CS_SUPPORT
1f016a
-
1f016a
-if BUILD_CS_PLUGIN
1f016a
-attrd_SOURCES	+= legacy.c
1f016a
-else
1f016a
-# Only build the new version where CPG is exclusively used for communication
1f016a
+if BUILD_ATOMIC_ATTRD
1f016a
 attrd_SOURCES	+= main.c commands.c
1f016a
-endif
1f016a
-
1f016a
 else
1f016a
 attrd_SOURCES	+= legacy.c
1f016a
 endif
1f016a
 
1f016a
-endif
1f016a
-
1f016a
 clean-generic:
1f016a
 	rm -f *.log *.debug *.xml *~
1f016a
 
1f016a
diff --git a/attrd/commands.c b/attrd/commands.c
1f016a
index 038e7e4..c48ef1b 100644
1f016a
--- a/attrd/commands.c
1f016a
+++ b/attrd/commands.c
1f016a
@@ -17,6 +17,8 @@
1f016a
  */
1f016a
 #include <crm_internal.h>
1f016a
 
1f016a
+#include <sys/types.h>
1f016a
+#include <regex.h>
1f016a
 #include <glib.h>
1f016a
 
1f016a
 #include <crm/msg_xml.h>
1f016a
@@ -63,7 +65,7 @@ typedef struct attribute_value_s {
1f016a
 
1f016a
 void write_attribute(attribute_t *a);
1f016a
 void write_or_elect_attribute(attribute_t *a);
1f016a
-void attrd_peer_update(crm_node_t *peer, xmlNode *xml, bool filter);
1f016a
+void attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter);
1f016a
 void attrd_peer_sync(crm_node_t *peer, xmlNode *xml);
1f016a
 void attrd_peer_remove(uint32_t nodeid, const char *host, gboolean uncache, const char *source);
1f016a
 
1f016a
@@ -191,16 +193,41 @@ attrd_client_message(crm_client_t *client, xmlNode *xml)
1f016a
         char *host = crm_element_value_copy(xml, F_ATTRD_HOST);
1f016a
         const char *attr = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
1f016a
         const char *value = crm_element_value(xml, F_ATTRD_VALUE);
1f016a
+        const char *regex = crm_element_value(xml, F_ATTRD_REGEX);
1f016a
 
1f016a
-        a = g_hash_table_lookup(attributes, attr);
1f016a
+        if(attr == NULL && regex) {
1f016a
+            GHashTableIter aIter;
1f016a
+            regex_t *r_patt = calloc(1, sizeof(regex_t));
1f016a
+
1f016a
+            crm_debug("Setting %s to %s", regex, value);
1f016a
+            if (regcomp(r_patt, regex, REG_EXTENDED)) {
1f016a
+                crm_err("Bad regex '%s' for update", regex);
1f016a
+                regfree(r_patt);
1f016a
+                free(r_patt);
1f016a
+                return;
1f016a
+            }
1f016a
 
1f016a
-        if(host == NULL) {
1f016a
+            g_hash_table_iter_init(&aIter, attributes);
1f016a
+            while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) {
1f016a
+                int status = regexec(r_patt, attr, 0, NULL, 0);
1f016a
+
1f016a
+                if(status == 0) {
1f016a
+                    crm_trace("Matched %s with %s", attr, regex);
1f016a
+                    crm_xml_add(xml, F_ATTRD_ATTRIBUTE, attr);
1f016a
+                    send_attrd_message(NULL, xml);
1f016a
+                }
1f016a
+            }
1f016a
+            return;
1f016a
+
1f016a
+        } else if(host == NULL) {
1f016a
             crm_trace("Inferring host");
1f016a
             host = strdup(attrd_cluster->uname);
1f016a
             crm_xml_add(xml, F_ATTRD_HOST, host);
1f016a
             crm_xml_add_int(xml, F_ATTRD_HOST_ID, attrd_cluster->nodeid);
1f016a
         }
1f016a
 
1f016a
+        a = g_hash_table_lookup(attributes, attr);
1f016a
+
1f016a
         if (value) {
1f016a
             int offset = 1;
1f016a
             int int_value = 0;
1f016a
@@ -254,6 +281,7 @@ attrd_client_message(crm_client_t *client, xmlNode *xml)
1f016a
     }
1f016a
 
1f016a
     if(broadcast) {
1f016a
+        /* Ends up at attrd_peer_message() */
1f016a
         send_attrd_message(NULL, xml);
1f016a
     }
1f016a
 }
1f016a
@@ -265,6 +293,7 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml)
1f016a
     const char *v = crm_element_value(xml, F_ATTRD_VERSION);
1f016a
     const char *op = crm_element_value(xml, F_ATTRD_TASK);
1f016a
     const char *election_op = crm_element_value(xml, F_CRM_TASK);
1f016a
+    const char *host = crm_element_value(xml, F_ATTRD_HOST);
1f016a
 
1f016a
     if(election_op) {
1f016a
         enum election_result rc = 0;
1f016a
@@ -293,7 +322,7 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml)
1f016a
             const char *name = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
1f016a
 
1f016a
             crm_trace("Compatibility update of %s from %s", name, peer->uname);
1f016a
-            attrd_peer_update(peer, xml, FALSE);
1f016a
+            attrd_peer_update(peer, xml, host, FALSE);
1f016a
 
1f016a
         } else if(safe_str_eq(op, "flush")) {
1f016a
             const char *name = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
1f016a
@@ -336,13 +365,12 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml)
1f016a
     }
1f016a
 
1f016a
     if(safe_str_eq(op, "update")) {
1f016a
-        attrd_peer_update(peer, xml, FALSE);
1f016a
+        attrd_peer_update(peer, xml, host, FALSE);
1f016a
 
1f016a
     } else if(safe_str_eq(op, "sync")) {
1f016a
         attrd_peer_sync(peer, xml);
1f016a
 
1f016a
     } else if(safe_str_eq(op, "peer-remove")) {
1f016a
-        const char *host = crm_element_value(xml, F_ATTRD_HOST);
1f016a
         attrd_peer_remove(0, host, TRUE, peer->uname);
1f016a
 
1f016a
     } else if(safe_str_eq(op, "sync-response")
1f016a
@@ -351,7 +379,8 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml)
1f016a
 
1f016a
         crm_notice("Processing %s from %s", op, peer->uname);
1f016a
         for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) {
1f016a
-            attrd_peer_update(peer, child, TRUE);
1f016a
+            host = crm_element_value(child, F_ATTRD_HOST);
1f016a
+            attrd_peer_update(peer, child, host, TRUE);
1f016a
         }
1f016a
     }
1f016a
 }
1f016a
@@ -409,12 +438,11 @@ attrd_peer_remove(uint32_t nodeid, const char *host, gboolean uncache, const cha
1f016a
 }
1f016a
 
1f016a
 void
1f016a
-attrd_peer_update(crm_node_t *peer, xmlNode *xml, bool filter)
1f016a
+attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter)
1f016a
 {
1f016a
     bool changed = FALSE;
1f016a
     attribute_value_t *v = NULL;
1f016a
 
1f016a
-    const char *host = crm_element_value(xml, F_ATTRD_HOST);
1f016a
     const char *attr = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
1f016a
     const char *value = crm_element_value(xml, F_ATTRD_VALUE);
1f016a
 
1f016a
@@ -424,6 +452,19 @@ attrd_peer_update(crm_node_t *peer, xmlNode *xml, bool filter)
1f016a
         a = create_attribute(xml);
1f016a
     }
1f016a
 
1f016a
+    if(host == NULL) {
1f016a
+        GHashTableIter vIter;
1f016a
+        g_hash_table_iter_init(&vIter, a->values);
1f016a
+
1f016a
+        crm_debug("Setting %s for all hosts to %s", attr, value);
1f016a
+
1f016a
+        xml_remove_prop(xml, F_ATTRD_HOST_ID);
1f016a
+        while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) {
1f016a
+            attrd_peer_update(peer, xml, host, filter);
1f016a
+        }
1f016a
+        return;
1f016a
+    }
1f016a
+
1f016a
     v = g_hash_table_lookup(a->values, host);
1f016a
 
1f016a
     if(v == NULL) {
1f016a
diff --git a/cib/messages.c b/cib/messages.c
1f016a
index 4b79912..9c66349 100644
1f016a
--- a/cib/messages.c
1f016a
+++ b/cib/messages.c
1f016a
@@ -292,6 +292,11 @@ cib_process_upgrade_server(const char *op, int options, const char *section, xml
1f016a
             crm_xml_add(up, F_TYPE, "cib");
1f016a
             crm_xml_add(up, F_CIB_OPERATION, CIB_OP_UPGRADE);
1f016a
             crm_xml_add(up, F_CIB_SCHEMA_MAX, get_schema_name(new_version));
1f016a
+            crm_xml_add(up, F_CIB_DELEGATED, host);
1f016a
+            crm_xml_add(up, F_CIB_CLIENTID, crm_element_value(req, F_CIB_CLIENTID));
1f016a
+            crm_xml_add(up, F_CIB_CALLOPTS, crm_element_value(req, F_CIB_CALLOPTS));
1f016a
+            crm_xml_add(up, F_CIB_CALLID, crm_element_value(req, F_CIB_CALLID));
1f016a
+
1f016a
             send_cluster_message(NULL, crm_msg_cib, up, FALSE);
1f016a
             free_xml(up);
1f016a
 
1f016a
diff --git a/configure.ac b/configure.ac
1f016a
index 40adffe..1edff40 100644
1f016a
--- a/configure.ac
1f016a
+++ b/configure.ac
1f016a
@@ -75,6 +75,7 @@ CC_IN_CONFIGURE=yes
1f016a
 export CC_IN_CONFIGURE
1f016a
 
1f016a
 LDD=ldd
1f016a
+BUILD_ATOMIC_ATTRD=1
1f016a
 
1f016a
 dnl ========================================================================
1f016a
 dnl Compiler characteristics
1f016a
@@ -1260,6 +1261,7 @@ case $SUPPORT_HEARTBEAT in
1f016a
 	dnl objdump -x ${libdir}/libccmclient.so | grep SONAME | awk '{print $2}'
1f016a
 	AC_DEFINE_UNQUOTED(CCM_LIBRARY, "libccmclient.so.1", Library to load for ccm support)
1f016a
 	AC_DEFINE_UNQUOTED(HEARTBEAT_LIBRARY, "libhbclient.so.1", Library to load for heartbeat support)
1f016a
+	BUILD_ATOMIC_ATTRD=0
1f016a
    else
1f016a
 	SUPPORT_HEARTBEAT=0
1f016a
    fi
1f016a
@@ -1341,6 +1343,7 @@ SUPPORT_PLUGIN=0
1f016a
 if test $SUPPORT_CS = 1 -a x$HAVE_confdb = x1; then
1f016a
     dnl Need confdb to support cman and the plugins
1f016a
     SUPPORT_PLUGIN=1
1f016a
+    BUILD_ATOMIC_ATTRD=0
1f016a
     LCRSODIR=`$PKGCONFIG corosync --variable=lcrsodir`
1f016a
     STACKS="$STACKS corosync-plugin"
1f016a
     COROSYNC_LIBS="$COROSYNC_LIBS $confdb_LIBS"
1f016a
@@ -1382,6 +1385,9 @@ AM_CONDITIONAL(BUILD_CS_SUPPORT, test $SUPPORT_CS = 1)
1f016a
 AM_CONDITIONAL(BUILD_CS_PLUGIN, test $SUPPORT_PLUGIN = 1)
1f016a
 AM_CONDITIONAL(BUILD_CMAN, test $SUPPORT_CMAN = 1)
1f016a
 
1f016a
+AM_CONDITIONAL(BUILD_ATOMIC_ATTRD, test $BUILD_ATOMIC_ATTRD = 1)
1f016a
+AC_DEFINE_UNQUOTED(HAVE_ATOMIC_ATTRD, $BUILD_ATOMIC_ATTRD, Support the new atomic attrd)
1f016a
+
1f016a
 AC_SUBST(SUPPORT_CMAN)
1f016a
 AC_SUBST(SUPPORT_CS)
1f016a
 
1f016a
@@ -1401,6 +1407,9 @@ else
1f016a
     PCMK_FEATURES="$PCMK_FEATURES $STACKS"
1f016a
 fi
1f016a
 
1f016a
+if test ${BUILD_ATOMIC_ATTRD} = 1; then
1f016a
+    PCMK_FEATURES="$PCMK_FEATURES atomic-attrd"
1f016a
+fi
1f016a
 AC_SUBST(CLUSTERLIBS)
1f016a
 AC_SUBST(LCRSODIR)
1f016a
 
1f016a
@@ -1871,6 +1880,7 @@ tools/Makefile							\
1f016a
 	tools/crm_report					\
1f016a
         tools/report.common                                     \
1f016a
 	tools/cibsecret						\
1f016a
+	tools/crm_mon.upstart					\
1f016a
 xml/Makefile							\
1f016a
 lib/gnu/Makefile						\
1f016a
 		)
1f016a
diff --git a/crmd/lrm.c b/crmd/lrm.c
1f016a
index db0bffb..44634fb 100644
1f016a
--- a/crmd/lrm.c
1f016a
+++ b/crmd/lrm.c
1f016a
@@ -1162,7 +1162,7 @@ get_lrm_resource(lrm_state_t * lrm_state, xmlNode * resource, xmlNode * op_msg,
1f016a
         if (!rsc) {
1f016a
             fsa_data_t *msg_data = NULL;
1f016a
 
1f016a
-            crm_err("Could not add resource %s to LRM", id);
1f016a
+            crm_err("Could not add resource %s to LRM %s", id, lrm_state->node_name);
1f016a
             register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
1f016a
         }
1f016a
     }
1f016a
@@ -1175,13 +1175,17 @@ delete_resource(lrm_state_t * lrm_state,
1f016a
                 const char *id,
1f016a
                 lrmd_rsc_info_t * rsc,
1f016a
                 GHashTableIter * gIter,
1f016a
-                const char *sys, const char *host, const char *user, ha_msg_input_t * request)
1f016a
+                const char *sys,
1f016a
+                const char *host,
1f016a
+                const char *user,
1f016a
+                ha_msg_input_t * request,
1f016a
+                gboolean unregister)
1f016a
 {
1f016a
     int rc = pcmk_ok;
1f016a
 
1f016a
     crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host);
1f016a
 
1f016a
-    if (rsc) {
1f016a
+    if (rsc && unregister) {
1f016a
         rc = lrm_state_unregister_rsc(lrm_state, id, 0);
1f016a
     }
1f016a
 
1f016a
@@ -1224,6 +1228,7 @@ do_lrm_invoke(long long action,
1f016a
     const char *user_name = NULL;
1f016a
     const char *target_node = NULL;
1f016a
     gboolean is_remote_node = FALSE;
1f016a
+    gboolean crm_rsc_delete = FALSE;
1f016a
 
1f016a
     if (input->xml != NULL) {
1f016a
         /* Remote node operations are routed here to their remote connections */
1f016a
@@ -1259,6 +1264,8 @@ do_lrm_invoke(long long action,
1f016a
     crm_trace("LRM command from: %s", from_sys);
1f016a
 
1f016a
     if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) {
1f016a
+        /* remember this delete op came from crm_resource */
1f016a
+        crm_rsc_delete = TRUE;
1f016a
         operation = CRMD_ACTION_DELETE;
1f016a
 
1f016a
     } else if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
1f016a
@@ -1370,13 +1377,17 @@ do_lrm_invoke(long long action,
1f016a
     } else if (safe_str_eq(operation, CRM_OP_REPROBE) || safe_str_eq(crm_op, CRM_OP_REPROBE)) {
1f016a
         GHashTableIter gIter;
1f016a
         rsc_history_t *entry = NULL;
1f016a
+        gboolean unregister = is_remote_lrmd_ra(NULL, NULL, entry->id) ? FALSE : TRUE;
1f016a
 
1f016a
         crm_notice("Forcing the status of all resources to be redetected");
1f016a
 
1f016a
         g_hash_table_iter_init(&gIter, lrm_state->resource_history);
1f016a
         while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
1f016a
+            /* only unregister the resource during a reprobe if it is not a remote connection
1f016a
+             * resource. otherwise unregistering the connection will terminate remote-node
1f016a
+             * membership */
1f016a
             delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host,
1f016a
-                            user_name, NULL);
1f016a
+                            user_name, NULL, unregister);
1f016a
         }
1f016a
 
1f016a
         /* Now delete the copy in the CIB */
1f016a
@@ -1499,6 +1510,7 @@ do_lrm_invoke(long long action,
1f016a
             free(op_key);
1f016a
 
1f016a
         } else if (rsc != NULL && safe_str_eq(operation, CRMD_ACTION_DELETE)) {
1f016a
+            gboolean unregister = TRUE;
1f016a
 
1f016a
 #if ENABLE_ACL
1f016a
             int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun | cib_sync_call, user_name);
1f016a
@@ -1523,7 +1535,11 @@ do_lrm_invoke(long long action,
1f016a
                 return;
1f016a
             }
1f016a
 #endif
1f016a
-            delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input);
1f016a
+            if (crm_rsc_delete == TRUE && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
1f016a
+                unregister = FALSE;
1f016a
+            }
1f016a
+
1f016a
+            delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input, unregister);
1f016a
 
1f016a
         } else if (rsc != NULL) {
1f016a
             do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg);
1f016a
diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c
1f016a
index 98f59c8..f3dedeb 100644
1f016a
--- a/crmd/remote_lrmd_ra.c
1f016a
+++ b/crmd/remote_lrmd_ra.c
1f016a
@@ -251,6 +251,8 @@ connection_takeover_timeout_cb(gpointer data)
1f016a
     crm_debug("takeover event timed out for node %s", cmd->rsc_id);
1f016a
     cmd->takeover_timeout_id = 0;
1f016a
 
1f016a
+    lrm_state = lrm_state_find(cmd->rsc_id);
1f016a
+
1f016a
     handle_remote_ra_stop(lrm_state, cmd);
1f016a
     free_cmd(cmd);
1f016a
 
1f016a
@@ -379,6 +381,11 @@ remote_lrm_op_callback(lrmd_event_data_t * op)
1f016a
             cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
1f016a
 
1f016a
         } else {
1f016a
+
1f016a
+            if (safe_str_eq(cmd->action, "start")) {
1f016a
+                /* clear PROBED value if it happens to be set after start completes. */
1f016a
+                update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
1f016a
+            }
1f016a
             lrm_state_reset_tables(lrm_state);
1f016a
             cmd->rc = PCMK_OCF_OK;
1f016a
             cmd->op_status = PCMK_LRM_OP_DONE;
1f016a
diff --git a/crmd/te_actions.c b/crmd/te_actions.c
1f016a
index 926996b..a3aa78b 100644
1f016a
--- a/crmd/te_actions.c
1f016a
+++ b/crmd/te_actions.c
1f016a
@@ -546,17 +546,26 @@ te_update_job_count(crm_action_t * action, int offset)
1f016a
         return;
1f016a
     }
1f016a
 
1f016a
-    if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
1f016a
+    /* if we have a router node, this means the action is performing
1f016a
+     * on a remote node. For now, we count all action occuring on a
1f016a
+     * remote node against the job list on the cluster node hosting
1f016a
+     * the connection resources */
1f016a
+    target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
1f016a
+
1f016a
+    if ((target == NULL) &&
1f016a
+        (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED))) {
1f016a
+
1f016a
         const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
1f016a
         const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
1f016a
 
1f016a
         te_update_job_count_on(t1, offset, TRUE);
1f016a
         te_update_job_count_on(t2, offset, TRUE);
1f016a
-
1f016a
-    } else {
1f016a
-
1f016a
-        te_update_job_count_on(target, offset, FALSE);
1f016a
+        return;
1f016a
+    } else if (target == NULL) {
1f016a
+        target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
1f016a
     }
1f016a
+
1f016a
+    te_update_job_count_on(target, offset, FALSE);
1f016a
 }
1f016a
 
1f016a
 static gboolean
1f016a
@@ -597,6 +606,8 @@ te_should_perform_action_on(crm_graph_t * graph, crm_action_t * action, const ch
1f016a
         }
1f016a
     }
1f016a
 
1f016a
+    crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
1f016a
+
1f016a
     return TRUE;
1f016a
 }
1f016a
 
1f016a
@@ -611,7 +622,15 @@ te_should_perform_action(crm_graph_t * graph, crm_action_t * action)
1f016a
         return TRUE;
1f016a
     }
1f016a
 
1f016a
-    if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
1f016a
+    /* if we have a router node, this means the action is performing
1f016a
+     * on a remote node. For now, we count all action occuring on a
1f016a
+     * remote node against the job list on the cluster node hosting
1f016a
+     * the connection resources */
1f016a
+    target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
1f016a
+
1f016a
+    if ((target == NULL) &&
1f016a
+        (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED))) {
1f016a
+
1f016a
         target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
1f016a
         if(te_should_perform_action_on(graph, action, target) == FALSE) {
1f016a
             return FALSE;
1f016a
@@ -619,7 +638,7 @@ te_should_perform_action(crm_graph_t * graph, crm_action_t * action)
1f016a
 
1f016a
         target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
1f016a
 
1f016a
-    } else {
1f016a
+    } else if (target == NULL) {
1f016a
         target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
1f016a
     }
1f016a
 
1f016a
diff --git a/crmd/te_events.c b/crmd/te_events.c
1f016a
index afe3072..b81a13e 100644
1f016a
--- a/crmd/te_events.c
1f016a
+++ b/crmd/te_events.c
1f016a
@@ -161,10 +161,6 @@ update_failcount(xmlNode * event, const char *event_node_uuid, int rc, int targe
1f016a
         do_update = TRUE;
1f016a
         value = failed_stop_offset;
1f016a
 
1f016a
-    } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
1f016a
-        do_update = TRUE;
1f016a
-        value = failed_stop_offset;
1f016a
-
1f016a
     } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
1f016a
         do_update = TRUE;
1f016a
 
1f016a
diff --git a/crmd/throttle.c b/crmd/throttle.c
1f016a
index 04a3cf1..6e853ae 100644
1f016a
--- a/crmd/throttle.c
1f016a
+++ b/crmd/throttle.c
1f016a
@@ -430,7 +430,7 @@ throttle_mode(void)
1f016a
     unsigned int blocked = 0;
1f016a
     enum throttle_state_e mode = throttle_none;
1f016a
 
1f016a
-#ifndef ON_SOLARIS
1f016a
+#ifdef ON_SOLARIS
1f016a
     return throttle_none;
1f016a
 #endif
1f016a
 
1f016a
@@ -508,44 +508,41 @@ static void
1f016a
 throttle_send_command(enum throttle_state_e mode)
1f016a
 {
1f016a
     xmlNode *xml = NULL;
1f016a
+    static enum throttle_state_e last = -1;
1f016a
 
1f016a
-    xml = create_request(CRM_OP_THROTTLE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
1f016a
-    crm_xml_add_int(xml, F_CRM_THROTTLE_MODE, mode);
1f016a
-    crm_xml_add_int(xml, F_CRM_THROTTLE_MAX, throttle_job_max);
1f016a
+    if(mode != last) {
1f016a
+        crm_info("New throttle mode: %.4x (was %.4x)", mode, last);
1f016a
+        last = mode;
1f016a
 
1f016a
-    send_cluster_message(NULL, crm_msg_crmd, xml, TRUE);
1f016a
-    free_xml(xml);
1f016a
+        xml = create_request(CRM_OP_THROTTLE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
1f016a
+        crm_xml_add_int(xml, F_CRM_THROTTLE_MODE, mode);
1f016a
+        crm_xml_add_int(xml, F_CRM_THROTTLE_MAX, throttle_job_max);
1f016a
 
1f016a
-    crm_info("Updated throttle state to %.4x", mode);
1f016a
+        send_cluster_message(NULL, crm_msg_crmd, xml, TRUE);
1f016a
+        free_xml(xml);
1f016a
+    }
1f016a
 }
1f016a
 
1f016a
 static gboolean
1f016a
 throttle_timer_cb(gpointer data)
1f016a
 {
1f016a
     static bool send_updates = FALSE;
1f016a
-    static enum throttle_state_e last = -1;
1f016a
-
1f016a
     enum throttle_state_e now = throttle_none;
1f016a
 
1f016a
-    if(send_updates == FALSE) {
1f016a
-        /* Optimize for the true case */
1f016a
-        if(compare_version(fsa_our_dc_version, "3.0.8") < 0) {
1f016a
-            crm_trace("DC version %s doesn't support throttling", fsa_our_dc_version);
1f016a
-
1f016a
-        } else {
1f016a
-            send_updates = TRUE;
1f016a
-        }
1f016a
-    }
1f016a
-
1f016a
     if(send_updates) {
1f016a
         now = throttle_mode();
1f016a
-    }
1f016a
+        throttle_send_command(now);
1f016a
+
1f016a
+    } else if(compare_version(fsa_our_dc_version, "3.0.8") < 0) {
1f016a
+        /* Optimize for the true case */
1f016a
+        crm_trace("DC version %s doesn't support throttling", fsa_our_dc_version);
1f016a
 
1f016a
-    if(send_updates && now != last) {
1f016a
-        crm_debug("New throttle mode: %.4x (was %.4x)", now, last);
1f016a
+    } else {
1f016a
+        send_updates = TRUE;
1f016a
+        now = throttle_mode();
1f016a
         throttle_send_command(now);
1f016a
-        last = now;
1f016a
     }
1f016a
+
1f016a
     return TRUE;
1f016a
 }
1f016a
 
1f016a
@@ -595,9 +592,11 @@ throttle_update_job_max(const char *preference)
1f016a
 void
1f016a
 throttle_init(void)
1f016a
 {
1f016a
-    throttle_records = g_hash_table_new_full(
1f016a
-        crm_str_hash, g_str_equal, NULL, throttle_record_free);
1f016a
-    throttle_timer = mainloop_timer_add("throttle", 30* 1000, TRUE, throttle_timer_cb, NULL);
1f016a
+    if(throttle_records == NULL) {
1f016a
+        throttle_records = g_hash_table_new_full(
1f016a
+            crm_str_hash, g_str_equal, NULL, throttle_record_free);
1f016a
+        throttle_timer = mainloop_timer_add("throttle", 30 * 1000, TRUE, throttle_timer_cb, NULL);
1f016a
+    }
1f016a
 
1f016a
     throttle_update_job_max(NULL);
1f016a
     mainloop_timer_start(throttle_timer);
1f016a
diff --git a/cts/CTS.py b/cts/CTS.py
1f016a
index 04189f2..f4198c4 100644
1f016a
--- a/cts/CTS.py
1f016a
+++ b/cts/CTS.py
1f016a
@@ -225,10 +225,13 @@ class CtsLab:
1f016a
 
1f016a
 class NodeStatus:
1f016a
     def __init__(self, env):
1f016a
-        pass
1f016a
+        self.Env = env
1f016a
 
1f016a
     def IsNodeBooted(self, node):
1f016a
         '''Return TRUE if the given node is booted (responds to pings)'''
1f016a
+        if self.Env["docker"]:
1f016a
+            return RemoteFactory().getInstance()("localhost", "docker inspect --format {{.State.Running}} %s | grep -q true" % node, silent=True) == 0
1f016a
+
1f016a
         return RemoteFactory().getInstance()("localhost", "ping -nq -c1 -w1 %s" % node, silent=True) == 0
1f016a
 
1f016a
     def IsSshdUp(self, node):
1f016a
@@ -442,6 +445,9 @@ class ClusterManager(UserDict):
1f016a
             self.debug("Quorum: %d Len: %d" % (q, len(self.Env["nodes"])))
1f016a
             return peer_list
1f016a
 
1f016a
+        for n in self.Env["nodes"]:
1f016a
+            peer_state[n] = "unknown"
1f016a
+
1f016a
         # Now see if any states need to be updated
1f016a
         self.debug("looking for: " + repr(stonith.regexes))
1f016a
         shot = stonith.look(0)
1f016a
@@ -457,7 +463,8 @@ class ClusterManager(UserDict):
1f016a
                     peer_state[peer] = "complete"
1f016a
                     self.__instance_errorstoignore.append(self.templates["Pat:Fencing_ok"] % peer)
1f016a
 
1f016a
-                elif re.search(self.templates["Pat:Fencing_start"] % n, shot):
1f016a
+                elif peer_state[n] != "complete" and re.search(self.templates["Pat:Fencing_start"] % n, shot):
1f016a
+                    # TODO: Correctly detect multiple fencing operations for the same host
1f016a
                     peer = n
1f016a
                     peer_state[peer] = "in-progress"
1f016a
                     self.__instance_errorstoignore.append(self.templates["Pat:Fencing_start"] % peer)
1f016a
diff --git a/cts/CTSlab.py b/cts/CTSlab.py
1f016a
index 314c347..9b336a5 100755
1f016a
--- a/cts/CTSlab.py
1f016a
+++ b/cts/CTSlab.py
1f016a
@@ -107,9 +107,9 @@ if __name__ == '__main__':
1f016a
 
1f016a
     if Environment["ListTests"] == 1:
1f016a
         Tests = TestList(cm, Audits)
1f016a
-        Environment.log("Total %d tests"%len(Tests))
1f016a
+        LogFactory().log("Total %d tests"%len(Tests))
1f016a
         for test in Tests :
1f016a
-            Environment.log(str(test.name));
1f016a
+            LogFactory().log(str(test.name));
1f016a
         sys.exit(0)
1f016a
 
1f016a
     elif len(Environment["tests"]) == 0:
1f016a
diff --git a/cts/CTStests.py b/cts/CTStests.py
1f016a
index 918dff0..cd5b7ce 100644
1f016a
--- a/cts/CTStests.py
1f016a
+++ b/cts/CTStests.py
1f016a
@@ -83,6 +83,7 @@ class CTSTest:
1f016a
         self.passed = 1
1f016a
         self.is_loop = 0
1f016a
         self.is_unsafe = 0
1f016a
+        self.is_docker_unsafe = 0
1f016a
         self.is_experimental = 0
1f016a
         self.is_container = 0
1f016a
         self.is_valgrind = 0
1f016a
@@ -224,6 +225,8 @@ class CTSTest:
1f016a
             return 0
1f016a
         elif self.is_experimental and not self.Env["experimental-tests"]:
1f016a
             return 0
1f016a
+        elif self.is_docker_unsafe and self.Env["docker"]:
1f016a
+            return 0
1f016a
         elif self.is_container and not self.Env["container-tests"]:
1f016a
             return 0
1f016a
         elif self.Env["benchmark"] and self.benchmark == 0:
1f016a
@@ -1359,6 +1362,8 @@ class ComponentFail(CTSTest):
1f016a
     def __init__(self, cm):
1f016a
         CTSTest.__init__(self,cm)
1f016a
         self.name = "ComponentFail"
1f016a
+        # TODO make this work correctly in docker.
1f016a
+        self.is_docker_unsafe = 1
1f016a
         self.startall = SimulStartLite(cm)
1f016a
         self.complist = cm.Components()
1f016a
         self.patterns = []
1f016a
@@ -1419,6 +1424,15 @@ class ComponentFail(CTSTest):
1f016a
                 self.okerrpatterns.append(self.templates["Pat:ChildRespawn"] %(node, chosen.name))
1f016a
                 self.okerrpatterns.append(self.templates["Pat:ChildExit"])
1f016a
 
1f016a
+        if chosen.name == "stonith":
1f016a
+            # Ignore actions for STONITH resources
1f016a
+            (rc, lines) = self.rsh(node, "crm_resource -c", None)
1f016a
+            for line in lines:
1f016a
+                if re.search("^Resource", line):
1f016a
+                    r = AuditResource(self.CM, line)
1f016a
+                    if r.rclass == "stonith":
1f016a
+                        self.okerrpatterns.append(self.templates["LogActions: Recover.*%s"] % r.id)
1f016a
+
1f016a
         # supply a copy so self.patterns doesnt end up empty
1f016a
         tmpPats = []
1f016a
         tmpPats.extend(self.patterns)
1f016a
@@ -2512,6 +2526,7 @@ class RemoteLXC(CTSTest):
1f016a
         self.startall = SimulStartLite(cm)
1f016a
         self.num_containers = 2
1f016a
         self.is_container = 1
1f016a
+        self.is_docker_unsafe = 1
1f016a
         self.failed = 0
1f016a
         self.fail_string = ""
1f016a
 
1f016a
@@ -2624,6 +2639,7 @@ class RemoteBaremetal(CTSTest):
1f016a
     def __init__(self, cm):
1f016a
         CTSTest.__init__(self,cm)
1f016a
         self.name = "RemoteBaremetal"
1f016a
+        self.is_docker_unsafe = 1
1f016a
         self.start = StartTest(cm)
1f016a
         self.startall = SimulStartLite(cm)
1f016a
         self.stop = StopTest(cm)
1f016a
diff --git a/cts/environment.py b/cts/environment.py
1f016a
index de1d099..d741452 100644
1f016a
--- a/cts/environment.py
1f016a
+++ b/cts/environment.py
1f016a
@@ -71,6 +71,7 @@ class Environment:
1f016a
         self["loop-tests"] = 1
1f016a
         self["scenario"] = "random"
1f016a
         self["stats"] = 0
1f016a
+        self["docker"] = 0
1f016a
 
1f016a
         self.RandomGen = random.Random()
1f016a
         self.logger = LogFactory()
1f016a
@@ -143,7 +144,9 @@ class Environment:
1f016a
                 # GoodThing(tm).
1f016a
                 try:
1f016a
                     n = node.strip()
1f016a
-                    gethostbyname_ex(n)
1f016a
+                    if self.data["docker"] == 0:
1f016a
+                        gethostbyname_ex(n)
1f016a
+
1f016a
                     self.Nodes.append(n) 
1f016a
                 except:
1f016a
                     self.logger.log(node+" not found in DNS... aborting")
1f016a
@@ -191,7 +194,10 @@ class Environment:
1f016a
             return "crm-lha"
1f016a
 
1f016a
         elif self.data["Stack"] == "corosync 2.x":
1f016a
-            return "crm-mcp"
1f016a
+            if self["docker"]:
1f016a
+                return "crm-mcp-docker"
1f016a
+            else:
1f016a
+                return "crm-mcp"
1f016a
 
1f016a
         elif self.data["Stack"] == "corosync (cman)":
1f016a
             return "crm-cman"
1f016a
@@ -342,6 +348,10 @@ class Environment:
1f016a
             elif args[i] == "--qarsh":
1f016a
                 RemoteFactory().enable_qarsh()
1f016a
 
1f016a
+            elif args[i] == "--docker":
1f016a
+                self["docker"] = 1
1f016a
+                RemoteFactory().enable_docker()
1f016a
+
1f016a
             elif args[i] == "--stonith" or args[i] == "--fencing":
1f016a
                 skipthis=1
1f016a
                 if args[i+1] == "1" or args[i+1] == "yes":
1f016a
@@ -352,6 +362,9 @@ class Environment:
1f016a
                     self["DoStonith"]=1
1f016a
                     self["stonith-type"] = "fence_xvm"
1f016a
                     self["stonith-params"] = "pcmk_arg_map=domain:uname,delay=0"
1f016a
+                elif args[i+1] == "docker":
1f016a
+                    self["DoStonith"]=1
1f016a
+                    self["stonith-type"] = "fence_docker_cts"
1f016a
                 elif args[i+1] == "scsi":
1f016a
                     self["DoStonith"]=1
1f016a
                     self["stonith-type"] = "fence_scsi"
1f016a
@@ -644,6 +657,7 @@ class Environment:
1f016a
         print "\t [--container-tests]          include pacemaker_remote tests that run in lxc container resources"
1f016a
         print "\t [--oprofile 'node list']     list of cluster nodes to run oprofile on]"
1f016a
         print "\t [--qarsh]                    use the QARSH backdoor to access nodes instead of SSH"
1f016a
+        print "\t [--docker]                   Indicates nodes are docker nodes."
1f016a
         print "\t [--seed random_seed]"
1f016a
         print "\t [--set option=value]"
1f016a
         print "\t "
1f016a
diff --git a/cts/lxc_autogen.sh.in b/cts/lxc_autogen.sh.in
1f016a
index 6900b67..e11532b 100755
1f016a
--- a/cts/lxc_autogen.sh.in
1f016a
+++ b/cts/lxc_autogen.sh.in
1f016a
@@ -72,6 +72,7 @@ if [ $verify -eq 1 ]; then
1f016a
 	virsh -c lxc:/// list --all > /dev/null 2>&1
1f016a
 	if [ $? -ne 0 ]; then
1f016a
 		echo "Could not connect 'virsh -c lxc:///' check that libvirt lxc driver is installed"
1f016a
+		# yum install -y libvirt-daemon-driver-lxc libvirt-daemon-lxc libvirt-login-shell
1f016a
 		exit 1
1f016a
 	fi
1f016a
 
1f016a
diff --git a/cts/patterns.py b/cts/patterns.py
1f016a
index f651965..8d34e1c 100644
1f016a
--- a/cts/patterns.py
1f016a
+++ b/cts/patterns.py
1f016a
@@ -364,9 +364,12 @@ class crm_cs_v0(BasePatterns):
1f016a
         self.components["stonith-ignore"] = [
1f016a
             "LogActions: Recover Fencing",
1f016a
             "Updating failcount for Fencing",
1f016a
+            "error: crm_ipc_read: Connection to stonith-ng failed",
1f016a
+            "error: mainloop_gio_callback: Connection to stonith-ng.*closed (I/O condition=17)",
1f016a
+            "crit: tengine_stonith_connection_destroy: Fencing daemon connection failed",
1f016a
             "error: te_connect_stonith:.*Sign-in failed: triggered a retry",
1f016a
             "STONITH connection failed, finalizing .* pending operations.",
1f016a
-            "process_lrm_event:.*Operation Fencing.* Error"
1f016a
+            "process_lrm_event:.*Operation Fencing.* Error",
1f016a
         ]
1f016a
         self.components["stonith-ignore"].extend(self.components["common-ignore"])
1f016a
 
1f016a
@@ -409,6 +412,20 @@ class crm_mcp(crm_cs_v0):
1f016a
 #                "Pat:We_stopped"   : "%s.*Stopped Corosync Cluster Engine",
1f016a
 #            })
1f016a
 
1f016a
+class crm_mcp_docker(crm_mcp):
1f016a
+    '''
1f016a
+    The crm version 4 cluster manager class.
1f016a
+    It implements the things we need to talk to and manipulate
1f016a
+    crm clusters running on top of native corosync (no plugins)
1f016a
+    '''
1f016a
+    def __init__(self, name):
1f016a
+        crm_mcp.__init__(self, name)
1f016a
+
1f016a
+        self.commands.update({
1f016a
+            "StartCmd"       : "pcmk_start",
1f016a
+            "StopCmd"        : "pcmk_stop",
1f016a
+        })
1f016a
+
1f016a
 class crm_cman(crm_cs_v0):
1f016a
     '''
1f016a
     The crm version 3 cluster manager class.
1f016a
@@ -454,6 +471,8 @@ class PatternSelector:
1f016a
             crm_cman(name)
1f016a
         elif name == "crm-mcp":
1f016a
             crm_mcp(name)
1f016a
+        elif name == "crm-mcp-docker":
1f016a
+            crm_mcp_docker(name)
1f016a
 
1f016a
     def get_variant(self, variant):
1f016a
         if patternvariants.has_key(variant):
1f016a
diff --git a/cts/remote.py b/cts/remote.py
1f016a
index c8253c3..7920fc9 100644
1f016a
--- a/cts/remote.py
1f016a
+++ b/cts/remote.py
1f016a
@@ -261,6 +261,12 @@ class RemoteFactory:
1f016a
     def new(self, silent=False):
1f016a
         return RemoteExec(RemoteFactory.rsh, silent)
1f016a
 
1f016a
+    def enable_docker(self):
1f016a
+        print "Using DOCKER backend for connections to cluster nodes"
1f016a
+
1f016a
+        RemoteFactory.rsh.Command = "/usr/libexec/phd/docker/phd_docker_remote_cmd "
1f016a
+        RemoteFactory.rsh.CpCommand = "/usr/libexec/phd/docker/phd_docker_cp"
1f016a
+
1f016a
     def enable_qarsh(self):
1f016a
         # http://nstraz.wordpress.com/2008/12/03/introducing-qarsh/
1f016a
         print "Using QARSH for connections to cluster nodes"
1f016a
diff --git a/cts/watcher.py b/cts/watcher.py
1f016a
index d33e580..5e6ee43 100644
1f016a
--- a/cts/watcher.py
1f016a
+++ b/cts/watcher.py
1f016a
@@ -165,7 +165,11 @@ class FileObj(SearchObj):
1f016a
             global log_watcher_bin
1f016a
 
1f016a
             self.debug("Installing %s on %s" % (log_watcher_bin, host))
1f016a
-            self.rsh(host, '''echo "%s" > %s''' % (log_watcher, log_watcher_bin), silent=True)
1f016a
+
1f016a
+            os.system("cat << END >> %s\n%s\nEND" %(log_watcher_bin, log_watcher))
1f016a
+            os.system("chmod 755 %s" %(log_watcher_bin))
1f016a
+
1f016a
+            self.rsh.cp(log_watcher_bin, "root@%s:%s" % (host, log_watcher_bin))
1f016a
             has_log_watcher[host] = 1
1f016a
 
1f016a
         self.harvest()
1f016a
@@ -176,7 +180,8 @@ class FileObj(SearchObj):
1f016a
             if match:
1f016a
                 last_offset = self.offset
1f016a
                 self.offset = match.group(1)
1f016a
-                #if last_offset == "EOF": self.debug("Got %d lines, new offset: %s" % (len(lines), self.offset))
1f016a
+                #if last_offset == "EOF": self.debug("Got %d lines, new offset: %s" % (len(outLines), self.offset))
1f016a
+                self.debug("Got %d lines, new offset: %s  %s" % (len(outLines), self.offset, repr(self.delegate)))
1f016a
 
1f016a
             elif re.search("^CTSwatcher:.*truncated", line):
1f016a
                 self.log(line)
1f016a
@@ -199,7 +204,7 @@ class FileObj(SearchObj):
1f016a
 
1f016a
         global log_watcher_bin
1f016a
         return self.rsh.call_async(self.host,
1f016a
-                "python %s -t %s -p CTSwatcher: -l 200 -f %s -o %s" % (log_watcher_bin, self.name, self.filename, self.offset),
1f016a
+                                   "python %s -t %s -p CTSwatcher: -l 200 -f %s -o %s -t %s" % (log_watcher_bin, self.name, self.filename, self.offset, self.name),
1f016a
                 completionDelegate=self)
1f016a
 
1f016a
     def setend(self):
1f016a
@@ -208,7 +213,7 @@ class FileObj(SearchObj):
1f016a
 
1f016a
         global log_watcher_bin
1f016a
         (rc, lines) = self.rsh(self.host,
1f016a
-                 "python %s -t %s -p CTSwatcher: -l 2 -f %s -o %s" % (log_watcher_bin, self.name, self.filename, "EOF"),
1f016a
+                               "python %s -t %s -p CTSwatcher: -l 2 -f %s -o %s -t %s" % (log_watcher_bin, self.name, self.filename, "EOF", self.name),
1f016a
                  None, silent=True)
1f016a
 
1f016a
         for line in lines:
1f016a
@@ -386,7 +391,7 @@ class LogWatcher(RemoteExec):
1f016a
 
1f016a
     def async_complete(self, pid, returncode, outLines, errLines):
1f016a
         # TODO: Probably need a lock for updating self.line_cache
1f016a
-        self.logger.debug("%s: Got %d lines from %d" % (self.name, len(outLines), pid))
1f016a
+        self.logger.debug("%s: Got %d lines from %d (total %d)" % (self.name, len(outLines), pid, len(self.line_cache)))
1f016a
         if len(outLines):
1f016a
             self.cache_lock.acquire()
1f016a
             self.line_cache.extend(outLines)
1f016a
@@ -407,7 +412,7 @@ class LogWatcher(RemoteExec):
1f016a
         for t in pending:
1f016a
             t.join(60.0)
1f016a
             if t.isAlive():
1f016a
-                self.logger.log("%s: Aborting after 20s waiting for %d logging commands" % (self.name, repr(t)))
1f016a
+                self.logger.log("%s: Aborting after 20s waiting for %s logging commands" % (self.name, repr(t)))
1f016a
                 return
1f016a
 
1f016a
         #print "Got %d lines" % len(self.line_cache)
1f016a
@@ -484,9 +489,6 @@ class LogWatcher(RemoteExec):
1f016a
                 if len(self.line_cache) == 0 and end < time.time():
1f016a
                     self.debug("Single search terminated: start=%d, end=%d, now=%d, lines=%d" % (begin, end, time.time(), lines))
1f016a
                     return None
1f016a
-                elif len(self.line_cache) == 0:
1f016a
-                    self.debug("Single search timed out: start=%d, end=%d, now=%d, lines=%d" % (begin, end, time.time(), lines))
1f016a
-                    return None
1f016a
                 else:
1f016a
                     self.debug("Waiting: start=%d, end=%d, now=%d, lines=%d" % (begin, end, time.time(), len(self.line_cache)))
1f016a
                     time.sleep(1)
1f016a
@@ -520,6 +522,7 @@ class LogWatcher(RemoteExec):
1f016a
                 self.unmatched = self.regexes
1f016a
                 self.matched = returnresult
1f016a
                 self.regexes = save_regexes
1f016a
+                self.end()
1f016a
                 return None
1f016a
 
1f016a
             returnresult.append(oneresult)
1f016a
diff --git a/extra/resources/remote b/extra/resources/remote
1f016a
index 9e0482b..9f141a2 100644
1f016a
--- a/extra/resources/remote
1f016a
+++ b/extra/resources/remote
1f016a
@@ -62,11 +62,11 @@ meta_data() {
1f016a
     </parameter>
1f016a
   </parameters>
1f016a
   <actions>
1f016a
-    <action name="start"   timeout="15" />
1f016a
-    <action name="stop"    timeout="15" />
1f016a
-    <action name="monitor"    timeout="15" />
1f016a
-    <action name="migrate_to"   timeout="15" />
1f016a
-    <action name="migrate_from" timeout="15" />
1f016a
+    <action name="start"   timeout="40" />
1f016a
+    <action name="stop"    timeout="40" />
1f016a
+    <action name="monitor"    timeout="30" />
1f016a
+    <action name="migrate_to"   timeout="60" />
1f016a
+    <action name="migrate_from" timeout="60" />
1f016a
     <action name="meta-data"  timeout="5" />
1f016a
   </actions>
1f016a
 </resource-agent>
1f016a
diff --git a/fencing/commands.c b/fencing/commands.c
1f016a
index a4e9f30..577ea95 100644
1f016a
--- a/fencing/commands.c
1f016a
+++ b/fencing/commands.c
1f016a
@@ -1094,7 +1094,10 @@ stonith_device_action(xmlNode * msg, char **output)
1f016a
         device = g_hash_table_lookup(device_list, id);
1f016a
     }
1f016a
 
1f016a
-    if (device) {
1f016a
+    if (device && device->api_registered == FALSE) {
1f016a
+        rc = -ENODEV;
1f016a
+
1f016a
+    } else if (device) {
1f016a
         cmd = create_async_command(msg);
1f016a
         if (cmd == NULL) {
1f016a
             free_device(device);
1f016a
diff --git a/fencing/main.c b/fencing/main.c
1f016a
index 5ae36cf..b03659e 100644
1f016a
--- a/fencing/main.c
1f016a
+++ b/fencing/main.c
1f016a
@@ -415,7 +415,7 @@ topology_remove_helper(const char *node, int level)
1f016a
     xmlNode *data = create_xml_node(NULL, F_STONITH_LEVEL);
1f016a
     xmlNode *notify_data = create_xml_node(NULL, STONITH_OP_LEVEL_DEL);
1f016a
 
1f016a
-    crm_xml_add(data, "origin", __FUNCTION__);
1f016a
+    crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
1f016a
     crm_xml_add_int(data, XML_ATTR_ID, level);
1f016a
     crm_xml_add(data, F_STONITH_TARGET, node);
1f016a
 
1f016a
diff --git a/include/crm/services.h b/include/crm/services.h
1f016a
index e8bc172..5310709 100644
1f016a
--- a/include/crm/services.h
1f016a
+++ b/include/crm/services.h
1f016a
@@ -152,6 +152,7 @@ enum nagios_exitcode {
1f016a
         int status;
1f016a
         int sequence;
1f016a
         int expected_rc;
1f016a
+        int synchronous;
1f016a
 
1f016a
         char *stderr_data;
1f016a
         char *stdout_data;
1f016a
diff --git a/include/crm_internal.h b/include/crm_internal.h
1f016a
index ba78da2..3eb88de 100644
1f016a
--- a/include/crm_internal.h
1f016a
+++ b/include/crm_internal.h
1f016a
@@ -220,7 +220,7 @@ gboolean crm_remote_recv(crm_remote_t * remote, int total_timeout /*ms */ , int
1f016a
 xmlNode *crm_remote_parse_buffer(crm_remote_t * remote);
1f016a
 int crm_remote_tcp_connect(const char *host, int port);
1f016a
 int crm_remote_tcp_connect_async(const char *host, int port, int timeout,       /*ms */
1f016a
-                                 void *userdata, void (*callback) (void *userdata, int sock));
1f016a
+                                 int *timer_id, void *userdata, void (*callback) (void *userdata, int sock));
1f016a
 
1f016a
 #  ifdef HAVE_GNUTLS_GNUTLS_H
1f016a
 /*!
1f016a
@@ -276,6 +276,7 @@ int crm_read_pidfile(const char *filename);
1f016a
 #  define attrd_channel		T_ATTRD
1f016a
 #  define F_ATTRD_KEY		"attr_key"
1f016a
 #  define F_ATTRD_ATTRIBUTE	"attr_name"
1f016a
+#  define F_ATTRD_REGEX 	"attr_regex"
1f016a
 #  define F_ATTRD_TASK		"task"
1f016a
 #  define F_ATTRD_VALUE		"attr_value"
1f016a
 #  define F_ATTRD_SET		"attr_set"
1f016a
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
1f016a
index 9410506..24700e5 100644
1f016a
--- a/lib/cluster/membership.c
1f016a
+++ b/lib/cluster/membership.c
1f016a
@@ -389,7 +389,9 @@ crm_find_peer(unsigned int id, const char *uname)
1f016a
         }
1f016a
 
1f016a
     } else if(uname && by_id->uname) {
1f016a
-        crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u", by_id->uname, by_name->uname, id);
1f016a
+        crm_dump_peer_hash(LOG_INFO, __FUNCTION__);
1f016a
+        crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
1f016a
+        crm_abort(__FILE__, __FUNCTION__, __LINE__, "member weirdness", TRUE, TRUE);
1f016a
 
1f016a
     } else if(id && by_name->id) {
1f016a
         crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
1f016a
diff --git a/lib/common/ipc.c b/lib/common/ipc.c
1f016a
index c1801a4..f26225f 100644
1f016a
--- a/lib/common/ipc.c
1f016a
+++ b/lib/common/ipc.c
1f016a
@@ -806,7 +806,7 @@ crm_ipc_connect(crm_ipc_t * client)
1f016a
 
1f016a
 #ifdef HAVE_IPCS_GET_BUFFER_SIZE
1f016a
     client->max_buf_size = qb_ipcc_get_buffer_size(client->ipc);
1f016a
-    if (client->max_buf_size < client->buf_size) {
1f016a
+    if (client->max_buf_size > client->buf_size) {
1f016a
         free(client->buffer);
1f016a
         client->buffer = calloc(1, client->max_buf_size);
1f016a
         client->buf_size = client->max_buf_size;
1f016a
diff --git a/lib/common/remote.c b/lib/common/remote.c
1f016a
index 0a7cd93..e2492b9 100644
1f016a
--- a/lib/common/remote.c
1f016a
+++ b/lib/common/remote.c
1f016a
@@ -737,11 +737,12 @@ check_connect_finished(gpointer userdata)
1f016a
 static int
1f016a
 internal_tcp_connect_async(int sock,
1f016a
                            const struct sockaddr *addr, socklen_t addrlen, int timeout /* ms */ ,
1f016a
-                           void *userdata, void (*callback) (void *userdata, int sock))
1f016a
+                           int *timer_id, void *userdata, void (*callback) (void *userdata, int sock))
1f016a
 {
1f016a
     int rc = 0;
1f016a
     int flag = 0;
1f016a
     int interval = 500;
1f016a
+    int timer;
1f016a
     struct tcp_async_cb_data *cb_data = NULL;
1f016a
 
1f016a
     if ((flag = fcntl(sock, F_GETFL)) >= 0) {
1f016a
@@ -782,7 +783,10 @@ internal_tcp_connect_async(int sock,
1f016a
      * Something about the way mainloop is currently polling prevents this from working at the
1f016a
      * moment though. */
1f016a
     crm_trace("fd %d: scheduling to check if connect finished in %dms second", sock, interval);
1f016a
-    g_timeout_add(interval, check_connect_finished, cb_data);
1f016a
+    timer = g_timeout_add(interval, check_connect_finished, cb_data);
1f016a
+    if (timer_id) {
1f016a
+        *timer_id = timer;
1f016a
+    }
1f016a
 
1f016a
     return 0;
1f016a
 }
1f016a
@@ -809,10 +813,11 @@ internal_tcp_connect(int sock, const struct sockaddr *addr, socklen_t addrlen)
1f016a
  * \internal
1f016a
  * \brief tcp connection to server at specified port
1f016a
  * \retval negative, failed to connect.
1f016a
+ * \retval positive, sock fd
1f016a
  */
1f016a
 int
1f016a
-crm_remote_tcp_connect_async(const char *host, int port, int timeout,   /*ms */
1f016a
-                             void *userdata, void (*callback) (void *userdata, int sock))
1f016a
+crm_remote_tcp_connect_async(const char *host, int port, int timeout, /*ms */
1f016a
+                             int *timer_id, void *userdata, void (*callback) (void *userdata, int sock))
1f016a
 {
1f016a
     char buffer[256];
1f016a
     struct addrinfo *res = NULL;
1f016a
@@ -877,8 +882,7 @@ crm_remote_tcp_connect_async(const char *host, int port, int timeout,   /*ms */
1f016a
 
1f016a
         if (callback) {
1f016a
             if (internal_tcp_connect_async
1f016a
-                (sock, rp->ai_addr, rp->ai_addrlen, timeout, userdata, callback) == 0) {
1f016a
-                sock = 0;
1f016a
+                (sock, rp->ai_addr, rp->ai_addrlen, timeout, timer_id, userdata, callback) == 0) {
1f016a
                 goto async_cleanup; /* Success for now, we'll hear back later in the callback */
1f016a
             }
1f016a
 
1f016a
@@ -903,5 +907,5 @@ async_cleanup:
1f016a
 int
1f016a
 crm_remote_tcp_connect(const char *host, int port)
1f016a
 {
1f016a
-    return crm_remote_tcp_connect_async(host, port, -1, NULL, NULL);
1f016a
+    return crm_remote_tcp_connect_async(host, port, -1, NULL, NULL, NULL);
1f016a
 }
1f016a
diff --git a/lib/common/utils.c b/lib/common/utils.c
1f016a
index e559c51..dc54e6d 100644
1f016a
--- a/lib/common/utils.c
1f016a
+++ b/lib/common/utils.c
1f016a
@@ -2005,6 +2005,9 @@ attrd_update_delegate(crm_ipc_t * ipc, char command, const char *host, const cha
1f016a
     }
1f016a
 
1f016a
     switch (command) {
1f016a
+        case 'u':
1f016a
+            crm_xml_add(update, F_ATTRD_TASK, "update");
1f016a
+            crm_xml_add(update, F_ATTRD_REGEX, name);
1f016a
         case 'D':
1f016a
         case 'U':
1f016a
         case 'v':
1f016a
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
1f016a
index 2837682..06b9492 100644
1f016a
--- a/lib/fencing/st_client.c
1f016a
+++ b/lib/fencing/st_client.c
1f016a
@@ -192,7 +192,7 @@ create_device_registration_xml(const char *id, const char *namespace, const char
1f016a
 #endif
1f016a
 
1f016a
     crm_xml_add(data, XML_ATTR_ID, id);
1f016a
-    crm_xml_add(data, "origin", __FUNCTION__);
1f016a
+    crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
1f016a
     crm_xml_add(data, "agent", agent);
1f016a
     crm_xml_add(data, "namespace", namespace);
1f016a
     if (rsc_provides) {
1f016a
@@ -229,7 +229,7 @@ stonith_api_remove_device(stonith_t * st, int call_options, const char *name)
1f016a
     xmlNode *data = NULL;
1f016a
 
1f016a
     data = create_xml_node(NULL, F_STONITH_DEVICE);
1f016a
-    crm_xml_add(data, "origin", __FUNCTION__);
1f016a
+    crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
1f016a
     crm_xml_add(data, XML_ATTR_ID, name);
1f016a
     rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0);
1f016a
     free_xml(data);
1f016a
@@ -244,7 +244,7 @@ stonith_api_remove_level(stonith_t * st, int options, const char *node, int leve
1f016a
     xmlNode *data = NULL;
1f016a
 
1f016a
     data = create_xml_node(NULL, F_STONITH_LEVEL);
1f016a
-    crm_xml_add(data, "origin", __FUNCTION__);
1f016a
+    crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
1f016a
     crm_xml_add(data, F_STONITH_TARGET, node);
1f016a
     crm_xml_add_int(data, XML_ATTR_ID, level);
1f016a
     rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0);
1f016a
@@ -260,7 +260,7 @@ create_level_registration_xml(const char *node, int level, stonith_key_value_t *
1f016a
 
1f016a
     crm_xml_add_int(data, XML_ATTR_ID, level);
1f016a
     crm_xml_add(data, F_STONITH_TARGET, node);
1f016a
-    crm_xml_add(data, "origin", __FUNCTION__);
1f016a
+    crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
1f016a
 
1f016a
     for (; device_list; device_list = device_list->next) {
1f016a
         xmlNode *dev = create_xml_node(data, F_STONITH_DEVICE);
1f016a
@@ -1255,7 +1255,7 @@ stonith_api_query(stonith_t * stonith, int call_options, const char *target,
1f016a
     CRM_CHECK(devices != NULL, return -EINVAL);
1f016a
 
1f016a
     data = create_xml_node(NULL, F_STONITH_DEVICE);
1f016a
-    crm_xml_add(data, "origin", __FUNCTION__);
1f016a
+    crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
1f016a
     crm_xml_add(data, F_STONITH_TARGET, target);
1f016a
     crm_xml_add(data, F_STONITH_ACTION, "off");
1f016a
     rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout);
1f016a
@@ -1296,7 +1296,7 @@ stonith_api_call(stonith_t * stonith,
1f016a
     xmlNode *data = NULL;
1f016a
 
1f016a
     data = create_xml_node(NULL, F_STONITH_DEVICE);
1f016a
-    crm_xml_add(data, "origin", __FUNCTION__);
1f016a
+    crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
1f016a
     crm_xml_add(data, F_STONITH_DEVICE, id);
1f016a
     crm_xml_add(data, F_STONITH_ACTION, action);
1f016a
     crm_xml_add(data, F_STONITH_TARGET, victim);
1f016a
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
1f016a
index 3496098..b8c5d23 100644
1f016a
--- a/lib/lrmd/lrmd_client.c
1f016a
+++ b/lib/lrmd/lrmd_client.c
1f016a
@@ -89,6 +89,9 @@ typedef struct lrmd_private_s {
1f016a
     int port;
1f016a
     gnutls_psk_client_credentials_t psk_cred_c;
1f016a
 
1f016a
+    /* while the async connection is occuring, this is the id
1f016a
+     * of the connection timeout timer. */
1f016a
+    int async_timer;
1f016a
     int sock;
1f016a
     /* since tls requires a round trip across the network for a
1f016a
      * request/reply, there are times where we just want to be able
1f016a
@@ -1101,6 +1104,8 @@ lrmd_tcp_connect_cb(void *userdata, int sock)
1f016a
     int rc = sock;
1f016a
     gnutls_datum_t psk_key = { NULL, 0 };
1f016a
 
1f016a
+    native->async_timer = 0;
1f016a
+
1f016a
     if (rc < 0) {
1f016a
         lrmd_tls_connection_destroy(lrmd);
1f016a
         crm_info("remote lrmd connect to %s at port %d failed", native->server, native->port);
1f016a
@@ -1152,14 +1157,23 @@ lrmd_tcp_connect_cb(void *userdata, int sock)
1f016a
 static int
1f016a
 lrmd_tls_connect_async(lrmd_t * lrmd, int timeout /*ms */ )
1f016a
 {
1f016a
-    int rc = 0;
1f016a
+    int rc = -1;
1f016a
+    int sock = 0;
1f016a
+    int timer_id = 0;
1f016a
+
1f016a
     lrmd_private_t *native = lrmd->private;
1f016a
 
1f016a
     lrmd_gnutls_global_init();
1f016a
 
1f016a
-    rc = crm_remote_tcp_connect_async(native->server, native->port, timeout, lrmd,
1f016a
+    sock = crm_remote_tcp_connect_async(native->server, native->port, timeout, &timer_id, lrmd,
1f016a
                                       lrmd_tcp_connect_cb);
1f016a
 
1f016a
+    if (sock != -1) {
1f016a
+        native->sock = sock;
1f016a
+        rc = 0;
1f016a
+        native->async_timer = timer_id;
1f016a
+    }
1f016a
+
1f016a
     return rc;
1f016a
 }
1f016a
 
1f016a
@@ -1319,6 +1333,11 @@ lrmd_tls_disconnect(lrmd_t * lrmd)
1f016a
         native->remote->tls_session = 0;
1f016a
     }
1f016a
 
1f016a
+    if (native->async_timer) {
1f016a
+        g_source_remove(native->async_timer);
1f016a
+        native->async_timer = 0;
1f016a
+    }
1f016a
+
1f016a
     if (native->source != NULL) {
1f016a
         /* Attached to mainloop */
1f016a
         mainloop_del_ipc_client(native->source);
1f016a
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
1f016a
index b699201..7127c12 100644
1f016a
--- a/lib/pengine/unpack.c
1f016a
+++ b/lib/pengine/unpack.c
1f016a
@@ -1756,6 +1756,7 @@ process_rsc_state(resource_t * rsc, node_t * node,
1f016a
     if (rsc->role > RSC_ROLE_STOPPED
1f016a
         && node->details->online == FALSE && is_set(rsc->flags, pe_rsc_managed)) {
1f016a
 
1f016a
+        char *reason = NULL;
1f016a
         gboolean should_fence = FALSE;
1f016a
 
1f016a
         /* if this is a remote_node living in a container, fence the container
1f016a
@@ -1768,14 +1769,25 @@ process_rsc_state(resource_t * rsc, node_t * node,
1f016a
 
1f016a
             should_fence = TRUE;
1f016a
         } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
1f016a
+            if (is_baremetal_remote_node(node) && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) {
1f016a
+                /* setting unceen = true means that fencing of the remote node will
1f016a
+                 * only occur if the connection resource is not going to start somewhere.
1f016a
+                 * This allows connection resources on a failed cluster-node to move to
1f016a
+                 * another node without requiring the baremetal remote nodes to be fenced
1f016a
+                 * as well. */
1f016a
+                node->details->unseen = TRUE;
1f016a
+                reason = g_strdup_printf("because %s is active there. Fencing will be revoked if remote-node connection can be re-established on another cluster-node.", rsc->id);
1f016a
+            }
1f016a
             should_fence = TRUE;
1f016a
         }
1f016a
 
1f016a
         if (should_fence) {
1f016a
-            char *reason = g_strdup_printf("because %s is thought to be active there", rsc->id);
1f016a
+            if (reason == NULL) {
1f016a
+               reason = g_strdup_printf("because %s is thought to be active there", rsc->id);
1f016a
+            }
1f016a
             pe_fence_node(data_set, node, reason);
1f016a
-            g_free(reason);
1f016a
         }
1f016a
+        g_free(reason);
1f016a
     }
1f016a
 
1f016a
     if (node->details->unclean) {
1f016a
@@ -1840,6 +1852,17 @@ process_rsc_state(resource_t * rsc, node_t * node,
1f016a
             break;
1f016a
     }
1f016a
 
1f016a
+    /* ensure a remote-node connection failure forces an unclean remote-node
1f016a
+     * to be fenced. By setting unseen = FALSE, the remote-node failure will
1f016a
+     * result in a fencing operation regardless if we're going to attempt to 
1f016a
+     * reconnect to the remote-node in this transition or not. */
1f016a
+    if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) {
1f016a
+        node_t *tmpnode = pe_find_node(data_set->nodes, rsc->id);
1f016a
+        if (tmpnode->details->unclean) {
1f016a
+            tmpnode->details->unseen = FALSE;
1f016a
+        }
1f016a
+    }
1f016a
+
1f016a
     if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
1f016a
         if (is_set(rsc->flags, pe_rsc_orphan)) {
1f016a
             if (is_set(rsc->flags, pe_rsc_managed)) {
1f016a
@@ -2160,7 +2183,7 @@ unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * d
1f016a
     for (gIter = unexpected_containers; gIter != NULL; gIter = gIter->next) {
1f016a
         remote = (resource_t *) gIter->data;
1f016a
         if (remote->role != RSC_ROLE_STARTED) {
1f016a
-            crm_warn("Recovering container resource %s. Resource is unexpectedly running and involves a remote-node.");
1f016a
+            crm_warn("Recovering container resource %s. Resource is unexpectedly running and involves a remote-node.", remote->container->id);
1f016a
             set_bit(remote->container->flags, pe_rsc_failed);
1f016a
         }
1f016a
     }
1f016a
diff --git a/lib/services/dbus.c b/lib/services/dbus.c
1f016a
index 8b8aee1..587589c 100644
1f016a
--- a/lib/services/dbus.c
1f016a
+++ b/lib/services/dbus.c
1f016a
@@ -6,6 +6,14 @@
1f016a
 
1f016a
 #define BUS_PROPERTY_IFACE "org.freedesktop.DBus.Properties"
1f016a
 
1f016a
+struct db_getall_data
1f016a
+{
1f016a
+        char *name;
1f016a
+        char *target;
1f016a
+        char *object;
1f016a
+        void *userdata;
1f016a
+        void (*callback)(const char *name, const char *value, void *userdata);
1f016a
+};
1f016a
 
1f016a
 static bool pcmk_dbus_error_check(DBusError *err, const char *prefix, const char *function, int line) 
1f016a
 {
1f016a
@@ -107,8 +115,9 @@ DBusMessage *pcmk_dbus_send_recv(DBusMessage *msg, DBusConnection *connection, D
1f016a
     method = dbus_message_get_member (msg);
1f016a
 
1f016a
     // send message and get a handle for a reply
1f016a
-    if (!dbus_connection_send_with_reply (connection, msg, &pending, -1)) { // -1 is default timeout
1f016a
+    if (!dbus_connection_send_with_reply (connection, msg, &pending, -1/* aka. DBUS_TIMEOUT_USE_DEFAULT */)) {
1f016a
         if(error) {
1f016a
+            dbus_error_init(error);
1f016a
             error->message = "Call to dbus_connection_send_with_reply() failed";
1f016a
             error->name = "org.clusterlabs.pacemaker.SendFailed";
1f016a
         }
1f016a
@@ -126,13 +135,7 @@ DBusMessage *pcmk_dbus_send_recv(DBusMessage *msg, DBusConnection *connection, D
1f016a
         reply = dbus_pending_call_steal_reply(pending);
1f016a
     }
1f016a
 
1f016a
-    if(pcmk_dbus_find_error(method, pending, reply, error)) {
1f016a
-        crm_trace("Was error: '%s' '%s'", error->name, error->message);
1f016a
-        if(reply) {
1f016a
-            dbus_message_unref(reply);
1f016a
-            reply = NULL;
1f016a
-        }
1f016a
-    }
1f016a
+    pcmk_dbus_find_error(method, pending, reply, error);
1f016a
 
1f016a
     if(pending) {
1f016a
         /* free the pending message handle */
1f016a
@@ -156,7 +159,7 @@ bool pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection,
1f016a
     method = dbus_message_get_member (msg);
1f016a
 
1f016a
     // send message and get a handle for a reply
1f016a
-    if (!dbus_connection_send_with_reply (connection, msg, &pending, -1)) { // -1 is default timeout
1f016a
+    if (!dbus_connection_send_with_reply (connection, msg, &pending, -1/* aka. DBUS_TIMEOUT_USE_DEFAULT */)) { // -1 is default timeout
1f016a
         crm_err("Send with reply failed for %s", method);
1f016a
         return FALSE;
1f016a
 
1f016a
@@ -205,65 +208,38 @@ bool pcmk_dbus_type_check(DBusMessage *msg, DBusMessageIter *field, int expected
1f016a
 
1f016a
         dbus_message_iter_init(msg, &args);
1f016a
         do_crm_log_alias(LOG_ERR, __FILE__, function, line,
1f016a
-                         "Unexepcted DBus type, expected %c instead of %c in '%s'",
1f016a
-                         expected, dtype, dbus_message_iter_get_signature(&args));
1f016a
+                         "Unexepcted DBus type, expected %c in '%s' instead of %c",
1f016a
+                         expected, dbus_message_iter_get_signature(&args), dtype);
1f016a
         return FALSE;
1f016a
     }
1f016a
 
1f016a
     return TRUE;
1f016a
 }
1f016a
 
1f016a
-char *
1f016a
-pcmk_dbus_get_property(
1f016a
-    DBusConnection *connection, const char *target, const char *obj, const gchar * iface, const char *name)
1f016a
+static char *
1f016a
+pcmk_dbus_lookup_result(DBusMessage *reply, struct db_getall_data *data)
1f016a
 {
1f016a
-    DBusMessage *msg;
1f016a
-    DBusMessageIter args;
1f016a
-    DBusMessageIter dict;
1f016a
-    DBusMessage *reply = NULL;
1f016a
-    /* DBusBasicValue value; */
1f016a
-    const char *method = "GetAll";
1f016a
-    char *output = NULL;
1f016a
     DBusError error;
1f016a
+    char *output = NULL;
1f016a
+    DBusMessageIter dict;
1f016a
+    DBusMessageIter args;
1f016a
 
1f016a
-        /* desc = systemd_unit_property(path, BUS_NAME ".Unit", "Description"); */
1f016a
-
1f016a
-    dbus_error_init(&error);
1f016a
-    crm_info("Calling: %s on %s", method, target);
1f016a
-    msg = dbus_message_new_method_call(target, // target for the method call
1f016a
-                                       obj, // object to call on
1f016a
-                                       BUS_PROPERTY_IFACE, // interface to call on
1f016a
-                                       method); // method name
1f016a
-
1f016a
-    if (NULL == msg) {
1f016a
-        crm_err("Call to %s failed: No message", method);
1f016a
-        return NULL;
1f016a
-    }
1f016a
-
1f016a
-    CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &iface, DBUS_TYPE_INVALID));
1f016a
-
1f016a
-    reply = pcmk_dbus_send_recv(msg, connection, &error);
1f016a
-    dbus_message_unref(msg);
1f016a
-
1f016a
-    if(error.name) {
1f016a
-        crm_err("Call to %s for %s failed: No reply", method, iface);
1f016a
-        return NULL;
1f016a
-
1f016a
-    } else if (!dbus_message_iter_init(reply, &args)) {
1f016a
-        crm_err("Cannot get properties for %s from %s", obj, iface);
1f016a
-        return NULL;
1f016a
+    if(pcmk_dbus_find_error("GetAll", (void*)&error, reply, &error)) {
1f016a
+        crm_err("Cannot get properties from %s for %s", data->target, data->object);
1f016a
+        goto cleanup;
1f016a
     }
1f016a
 
1f016a
+    dbus_message_iter_init(reply, &args);
1f016a
     if(!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY, __FUNCTION__, __LINE__)) {
1f016a
-        crm_err("Call to %s failed: Message has invalid arguments", method);
1f016a
-        dbus_message_unref(reply);
1f016a
-        return NULL;
1f016a
+        crm_err("Invalid reply from %s for %s", data->target, data->object);
1f016a
+        goto cleanup;
1f016a
     }
1f016a
 
1f016a
     dbus_message_iter_recurse(&args, &dict);
1f016a
     while (dbus_message_iter_get_arg_type (&dict) != DBUS_TYPE_INVALID) {
1f016a
         DBusMessageIter sv;
1f016a
         DBusMessageIter v;
1f016a
+        DBusBasicValue name;
1f016a
         DBusBasicValue value;
1f016a
 
1f016a
         if(!pcmk_dbus_type_check(reply, &dict, DBUS_TYPE_DICT_ENTRY, __FUNCTION__, __LINE__)) {
1f016a
@@ -277,10 +253,9 @@ pcmk_dbus_get_property(
1f016a
 
1f016a
             switch(dtype) {
1f016a
                 case DBUS_TYPE_STRING:
1f016a
-                    dbus_message_iter_get_basic(&sv, &value);
1f016a
+                    dbus_message_iter_get_basic(&sv, &name);
1f016a
 
1f016a
-                    crm_trace("Got: %s", value.str);
1f016a
-                    if(strcmp(value.str, name) != 0) {
1f016a
+                    if(data->name && strcmp(name.str, data->name) != 0) {
1f016a
                         dbus_message_iter_next (&sv;; /* Skip the value */
1f016a
                     }
1f016a
                     break;
1f016a
@@ -289,8 +264,17 @@ pcmk_dbus_get_property(
1f016a
                     if(pcmk_dbus_type_check(reply, &v, DBUS_TYPE_STRING, __FUNCTION__, __LINE__)) {
1f016a
                         dbus_message_iter_get_basic(&v, &value);
1f016a
 
1f016a
-                        crm_trace("Result: %s", value.str);
1f016a
-                        output = strdup(value.str);
1f016a
+                        crm_trace("Property %s[%s] is '%s'", data->object, name.str, value.str);
1f016a
+                        if(data->callback) {
1f016a
+                            data->callback(name.str, value.str, data->userdata);
1f016a
+
1f016a
+                        } else {
1f016a
+                            output = strdup(value.str);
1f016a
+                        }
1f016a
+
1f016a
+                        if(data->name) {
1f016a
+                            goto cleanup;
1f016a
+                        }
1f016a
                     }
1f016a
                     break;
1f016a
                 default:
1f016a
@@ -302,8 +286,82 @@ pcmk_dbus_get_property(
1f016a
         dbus_message_iter_next (&dict);
1f016a
     }
1f016a
 
1f016a
+  cleanup:
1f016a
+    free(data->target);
1f016a
+    free(data->object);
1f016a
+    free(data->name);
1f016a
+    free(data);
1f016a
+
1f016a
+    return output;
1f016a
+}
1f016a
+
1f016a
+static void
1f016a
+pcmk_dbus_lookup_cb(DBusPendingCall *pending, void *user_data)
1f016a
+{
1f016a
+    DBusMessage *reply = NULL;
1f016a
+
1f016a
+    if(pending) {
1f016a
+        reply = dbus_pending_call_steal_reply(pending);
1f016a
+    }
1f016a
+
1f016a
+    pcmk_dbus_lookup_result(reply, user_data);
1f016a
+
1f016a
+    if(reply) {
1f016a
+        dbus_message_unref(reply);
1f016a
+    }
1f016a
+}
1f016a
+
1f016a
+char *
1f016a
+pcmk_dbus_get_property(
1f016a
+    DBusConnection *connection, const char *target, const char *obj, const gchar * iface, const char *name,
1f016a
+    void (*callback)(const char *name, const char *value, void *userdata), void *userdata)
1f016a
+{
1f016a
+    DBusMessage *msg;
1f016a
+    const char *method = "GetAll";
1f016a
+    char *output = NULL;
1f016a
+
1f016a
+    struct db_getall_data *query_data = NULL;
1f016a
+
1f016a
+    /* char *state = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, unit, BUS_NAME ".Unit", "ActiveState"); */
1f016a
+
1f016a
+    crm_debug("Calling: %s on %s", method, target);
1f016a
+    msg = dbus_message_new_method_call(target, // target for the method call
1f016a
+                                       obj, // object to call on
1f016a
+                                       BUS_PROPERTY_IFACE, // interface to call on
1f016a
+                                       method); // method name
1f016a
+
1f016a
+    if (NULL == msg) {
1f016a
+        crm_err("Call to %s failed: No message", method);
1f016a
+        return NULL;
1f016a
+    }
1f016a
+
1f016a
+    CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &iface, DBUS_TYPE_INVALID));
1f016a
+
1f016a
+    query_data = malloc(sizeof(struct db_getall_data));
1f016a
+    query_data->target = strdup(target);
1f016a
+    query_data->object = strdup(obj);
1f016a
+    query_data->callback = callback;
1f016a
+    query_data->userdata = userdata;
1f016a
+    query_data->name = NULL;
1f016a
+
1f016a
+    if(name) {
1f016a
+        query_data->name = strdup(name);
1f016a
+    }
1f016a
+
1f016a
+    if(query_data->callback) {
1f016a
+        pcmk_dbus_send(msg, connection, pcmk_dbus_lookup_cb, query_data);
1f016a
+
1f016a
+    } else {
1f016a
+        DBusMessage *reply = pcmk_dbus_send_recv(msg, connection, NULL);
1f016a
+
1f016a
+        output = pcmk_dbus_lookup_result(reply, query_data);
1f016a
+        if(reply) {
1f016a
+            dbus_message_unref(reply);
1f016a
+        }
1f016a
+    }
1f016a
+
1f016a
+    dbus_message_unref(msg);
1f016a
 
1f016a
-    crm_trace("Property %s[%s] is '%s'", obj, name, output);
1f016a
     return output;
1f016a
 }
1f016a
 
1f016a
@@ -354,6 +412,14 @@ pcmk_dbus_watch_add(DBusWatch *watch, void *data){
1f016a
 }
1f016a
 
1f016a
 static void
1f016a
+pcmk_dbus_watch_toggle(DBusWatch *watch, void *data)
1f016a
+{
1f016a
+    mainloop_io_t *client = dbus_watch_get_data(watch);
1f016a
+    crm_notice("DBus client %p is now %s", client, dbus_watch_get_enabled(watch)?"enabled":"disabled");
1f016a
+}
1f016a
+
1f016a
+
1f016a
+static void
1f016a
 pcmk_dbus_watch_remove(DBusWatch *watch, void *data){
1f016a
     mainloop_io_t *client = dbus_watch_get_data(watch);
1f016a
 
1f016a
@@ -404,7 +470,7 @@ pcmk_dbus_timeout_toggle(DBusTimeout *timeout, void *data){
1f016a
 void pcmk_dbus_connection_setup_with_select(DBusConnection *c){
1f016a
 	dbus_connection_set_timeout_functions(
1f016a
             c, pcmk_dbus_timeout_add, pcmk_dbus_timeout_remove, pcmk_dbus_timeout_toggle, NULL, NULL);
1f016a
-	dbus_connection_set_watch_functions(c, pcmk_dbus_watch_add, pcmk_dbus_watch_remove, NULL, NULL, NULL);
1f016a
+	dbus_connection_set_watch_functions(c, pcmk_dbus_watch_add, pcmk_dbus_watch_remove, pcmk_dbus_watch_toggle, NULL, NULL);
1f016a
 	dbus_connection_set_dispatch_status_function(c, pcmk_dbus_connection_dispatch, NULL, NULL);
1f016a
 
1f016a
 	pcmk_dbus_connection_dispatch(c, dbus_connection_get_dispatch_status(c), NULL);
1f016a
diff --git a/lib/services/pcmk-dbus.h b/lib/services/pcmk-dbus.h
1f016a
index 3b7a598..ed80c5f 100644
1f016a
--- a/lib/services/pcmk-dbus.h
1f016a
+++ b/lib/services/pcmk-dbus.h
1f016a
@@ -6,7 +6,9 @@ bool pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection,
1f016a
                     void(*done)(DBusPendingCall *pending, void *user_data), void *user_data);
1f016a
 DBusMessage *pcmk_dbus_send_recv(DBusMessage *msg, DBusConnection *connection, DBusError *error);
1f016a
 bool pcmk_dbus_type_check(DBusMessage *msg, DBusMessageIter *field, int expected, const char *function, int line);
1f016a
-char *pcmk_dbus_get_property(DBusConnection *connection, const char *target, const char *obj, const gchar * iface, const char *name);
1f016a
+char *pcmk_dbus_get_property(
1f016a
+    DBusConnection *connection, const char *target, const char *obj, const gchar * iface, const char *name,
1f016a
+    void (*callback)(const char *name, const char *value, void *userdata), void *userdata);
1f016a
 
1f016a
 bool pcmk_dbus_find_error(const char *method, DBusPendingCall* pending, DBusMessage *reply, DBusError *error);
1f016a
 
1f016a
diff --git a/lib/services/services.c b/lib/services/services.c
1f016a
index 7b32405..8590b56 100644
1f016a
--- a/lib/services/services.c
1f016a
+++ b/lib/services/services.c
1f016a
@@ -473,6 +473,7 @@ handle_duplicate_recurring(svc_action_t * op, void (*action_callback) (svc_actio
1f016a
 gboolean
1f016a
 services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *))
1f016a
 {
1f016a
+    op->synchronous = false;
1f016a
     if (action_callback) {
1f016a
         op->opaque->callback = action_callback;
1f016a
     }
1f016a
@@ -491,7 +492,7 @@ services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *
1f016a
     }
1f016a
     if (op->standard && strcasecmp(op->standard, "systemd") == 0) {
1f016a
 #if SUPPORT_SYSTEMD
1f016a
-        return systemd_unit_exec(op, FALSE);
1f016a
+        return systemd_unit_exec(op);
1f016a
 #endif
1f016a
     }
1f016a
     return services_os_action_execute(op, FALSE);
1f016a
@@ -502,6 +503,7 @@ services_action_sync(svc_action_t * op)
1f016a
 {
1f016a
     gboolean rc = TRUE;
1f016a
 
1f016a
+    op->synchronous = true;
1f016a
     if (op == NULL) {
1f016a
         crm_trace("No operation to execute");
1f016a
         return FALSE;
1f016a
@@ -512,7 +514,7 @@ services_action_sync(svc_action_t * op)
1f016a
 #endif
1f016a
     } else if (op->standard && strcasecmp(op->standard, "systemd") == 0) {
1f016a
 #if SUPPORT_SYSTEMD
1f016a
-        rc = systemd_unit_exec(op, TRUE);
1f016a
+        rc = systemd_unit_exec(op);
1f016a
 #endif
1f016a
     } else {
1f016a
         rc = services_os_action_execute(op, TRUE);
1f016a
diff --git a/lib/services/systemd.c b/lib/services/systemd.c
1f016a
index e81d178..c967430 100644
1f016a
--- a/lib/services/systemd.c
1f016a
+++ b/lib/services/systemd.c
1f016a
@@ -35,6 +35,9 @@
1f016a
 /*
1f016a
    /usr/share/dbus-1/interfaces/org.freedesktop.systemd1.Manager.xml
1f016a
 */
1f016a
+gboolean
1f016a
+systemd_unit_exec_with_unit(svc_action_t * op, const char *unit);
1f016a
+
1f016a
 
1f016a
 struct unit_info {
1f016a
     const char *id;
1f016a
@@ -49,6 +52,15 @@ struct unit_info {
1f016a
     const char *job_path;
1f016a
 };
1f016a
 
1f016a
+struct pcmk_dbus_data 
1f016a
+{
1f016a
+        char *name;
1f016a
+        char *unit;
1f016a
+        DBusError error;
1f016a
+        svc_action_t *op;
1f016a
+        void (*callback)(DBusMessage *reply, svc_action_t *op);
1f016a
+};
1f016a
+
1f016a
 static DBusMessage *systemd_new_method(const char *iface, const char *method)
1f016a
 {
1f016a
     crm_trace("Calling: %s on %s", method, iface);
1f016a
@@ -101,6 +113,7 @@ systemd_service_name(const char *name)
1f016a
 static bool
1f016a
 systemd_daemon_reload(void)
1f016a
 {
1f016a
+    /* TODO: Make this asynchronous */
1f016a
     const char *method = "Reload";
1f016a
     DBusMessage *reply = NULL;
1f016a
     DBusMessage *msg = systemd_new_method(BUS_NAME".Manager", method);
1f016a
@@ -114,21 +127,55 @@ systemd_daemon_reload(void)
1f016a
     return TRUE;
1f016a
 }
1f016a
 
1f016a
-static gboolean
1f016a
-systemd_unit_by_name(const gchar * arg_name, gchar ** out_unit)
1f016a
+static const char *
1f016a
+systemd_loadunit_result(DBusMessage *reply, svc_action_t * op)
1f016a
+{
1f016a
+    const char *path = NULL;
1f016a
+
1f016a
+    if(pcmk_dbus_find_error("LoadUnit", (void*)&path, reply, NULL)) {
1f016a
+        if(op) {
1f016a
+            crm_warn("No unit found for %s", op->rsc);
1f016a
+        }
1f016a
+
1f016a
+    } else if(pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __FUNCTION__, __LINE__)) {
1f016a
+        dbus_message_get_args (reply, NULL,
1f016a
+                               DBUS_TYPE_OBJECT_PATH, &path,
1f016a
+                               DBUS_TYPE_INVALID);
1f016a
+    }
1f016a
+
1f016a
+    if(op) {
1f016a
+        systemd_unit_exec_with_unit(op, path);
1f016a
+    }
1f016a
+
1f016a
+    return path;
1f016a
+}
1f016a
+
1f016a
+
1f016a
+static void
1f016a
+systemd_loadunit_cb(DBusPendingCall *pending, void *user_data)
1f016a
+{
1f016a
+    DBusMessage *reply = NULL;
1f016a
+
1f016a
+    if(pending) {
1f016a
+        reply = dbus_pending_call_steal_reply(pending);
1f016a
+    }
1f016a
+
1f016a
+    systemd_loadunit_result(reply, user_data);
1f016a
+
1f016a
+    if(reply) {
1f016a
+        dbus_message_unref(reply);
1f016a
+    }
1f016a
+}
1f016a
+
1f016a
+static char *
1f016a
+systemd_unit_by_name(const gchar * arg_name, svc_action_t *op)
1f016a
 {
1f016a
     DBusMessage *msg;
1f016a
     DBusMessage *reply = NULL;
1f016a
-    const char *method = "GetUnit";
1f016a
     char *name = NULL;
1f016a
-    DBusError error;
1f016a
 
1f016a
 /*
1f016a
-  <method name="GetUnit">
1f016a
-   <arg name="name" type="s" direction="in"/>
1f016a
-   <arg name="unit" type="o" direction="out"/>
1f016a
-  </method>
1f016a
-
1f016a
+  Equivalent to GetUnit if its already loaded
1f016a
   <method name="LoadUnit">
1f016a
    <arg name="name" type="s" direction="in"/>
1f016a
    <arg name="unit" type="o" direction="out"/>
1f016a
@@ -139,51 +186,34 @@ systemd_unit_by_name(const gchar * arg_name, gchar ** out_unit)
1f016a
         return FALSE;
1f016a
     }
1f016a
 
1f016a
-    name = systemd_service_name(arg_name);
1f016a
+    msg = systemd_new_method(BUS_NAME".Manager", "LoadUnit");
1f016a
+    CRM_ASSERT(msg != NULL);
1f016a
 
1f016a
-    while(TRUE) {
1f016a
-        msg = systemd_new_method(BUS_NAME".Manager", method);
1f016a
-        CRM_ASSERT(msg != NULL);
1f016a
+    name = systemd_service_name(arg_name);
1f016a
+    CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID));
1f016a
+    free(name);
1f016a
 
1f016a
-        CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID));
1f016a
+    if(op == NULL || op->synchronous) {
1f016a
+        const char *unit = NULL;
1f016a
+        char *munit = NULL;
1f016a
+        DBusError error;
1f016a
 
1f016a
         dbus_error_init(&error);
1f016a
         reply = pcmk_dbus_send_recv(msg, systemd_proxy, &error);
1f016a
         dbus_message_unref(msg);
1f016a
 
1f016a
-        if(error.name) {
1f016a
-            crm_info("Call to %s failed: %s", method, error.name);
1f016a
-
1f016a
-        } else if(pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __FUNCTION__, __LINE__)) {
1f016a
-            if(out_unit) {
1f016a
-                char *path = NULL;
1f016a
-
1f016a
-                dbus_message_get_args (reply, NULL,
1f016a
-                                       DBUS_TYPE_OBJECT_PATH, &path,
1f016a
-                                       DBUS_TYPE_INVALID);
1f016a
-
1f016a
-                *out_unit = strdup(path);
1f016a
-            }
1f016a
-            dbus_message_unref(reply);
1f016a
-            free(name);
1f016a
-            return TRUE;
1f016a
+        unit = systemd_loadunit_result(reply, op);
1f016a
+        if(unit) {
1f016a
+            munit = strdup(unit);
1f016a
         }
1f016a
-
1f016a
-        if(strcmp(method, "LoadUnit") != 0) {
1f016a
-            method = "LoadUnit";
1f016a
-            crm_debug("Cannot find %s, reloading the systemd manager configuration", name);
1f016a
-            systemd_daemon_reload();
1f016a
-            if(reply) {
1f016a
-                dbus_message_unref(reply);
1f016a
-                reply = NULL;
1f016a
-            }
1f016a
-
1f016a
-        } else {
1f016a
-            free(name);
1f016a
-            return FALSE;
1f016a
+        if(reply) {
1f016a
+            dbus_message_unref(reply);
1f016a
         }
1f016a
+        return munit;
1f016a
     }
1f016a
-    return FALSE;
1f016a
+
1f016a
+    pcmk_dbus_send(msg, systemd_proxy, systemd_loadunit_cb, op);
1f016a
+    return NULL;
1f016a
 }
1f016a
 
1f016a
 GList *
1f016a
@@ -220,6 +250,10 @@ systemd_unit_listall(void)
1f016a
         crm_err("Call to %s failed: %s", method, error.name);
1f016a
         return NULL;
1f016a
 
1f016a
+    } else if (reply == NULL) {
1f016a
+        crm_err("Call to %s failed: Message has no reply", method);
1f016a
+        return NULL;
1f016a
+
1f016a
     } else if (!dbus_message_iter_init(reply, &args)) {
1f016a
         crm_err("Call to %s failed: Message has no arguments", method);
1f016a
         dbus_message_unref(reply);
1f016a
@@ -269,21 +303,27 @@ systemd_unit_listall(void)
1f016a
 gboolean
1f016a
 systemd_unit_exists(const char *name)
1f016a
 {
1f016a
-    return systemd_unit_by_name(name, NULL);
1f016a
+    /* Note: Makes a blocking dbus calls
1f016a
+     * Used by resources_find_service_class() when resource class=service
1f016a
+     */
1f016a
+    if(systemd_unit_by_name(name, NULL)) {
1f016a
+        return TRUE;
1f016a
+    }
1f016a
+    return FALSE;
1f016a
 }
1f016a
 
1f016a
 static char *
1f016a
 systemd_unit_metadata(const char *name)
1f016a
 {
1f016a
-    char *path = NULL;
1f016a
     char *meta = NULL;
1f016a
     char *desc = NULL;
1f016a
+    char *path = systemd_unit_by_name(name, NULL);
1f016a
 
1f016a
-    if (systemd_unit_by_name(name, &path)) {
1f016a
-        CRM_ASSERT(path);
1f016a
-        desc = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, path, BUS_NAME ".Unit", "Description");
1f016a
+    if (path) {
1f016a
+        /* TODO: Worth a making blocking call for? Probably not. Possibly if cached. */
1f016a
+        desc = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, path, BUS_NAME ".Unit", "Description", NULL, NULL);
1f016a
     } else {
1f016a
-        desc = g_strdup_printf("systemd unit file for %s", name);
1f016a
+        desc = g_strdup_printf("Systemd unit file for %s", name);
1f016a
     }
1f016a
 
1f016a
     meta = g_strdup_printf("\n"
1f016a
@@ -335,24 +375,15 @@ systemd_mask_error(svc_action_t *op, const char *error)
1f016a
 }
1f016a
 
1f016a
 static void
1f016a
-systemd_async_dispatch(DBusPendingCall *pending, void *user_data)
1f016a
+systemd_exec_result(DBusMessage *reply, svc_action_t *op)
1f016a
 {
1f016a
     DBusError error;
1f016a
-    DBusMessage *reply = NULL;
1f016a
-    svc_action_t *op = user_data;
1f016a
 
1f016a
-    dbus_error_init(&error);
1f016a
-    if(pending) {
1f016a
-        reply = dbus_pending_call_steal_reply(pending);
1f016a
-    }
1f016a
-    if(reply == NULL) {
1f016a
-        crm_err("No reply for %s action on %s", op->action, op->rsc);
1f016a
-
1f016a
-    } else if(pcmk_dbus_find_error(op->action, pending, reply, &error)) {
1f016a
+    if(pcmk_dbus_find_error(op->action, (void*)&error, reply, &error)) {
1f016a
 
1f016a
         /* ignore "already started" or "not running" errors */
1f016a
         if (!systemd_mask_error(op, error.name)) {
1f016a
-            crm_err("%s for %s: %s", op->action, op->rsc, error.message);
1f016a
+            crm_err("Could not issue %s for %s: %s (%s)", op->action, op->rsc, error.message);
1f016a
         }
1f016a
 
1f016a
     } else {
1f016a
@@ -372,6 +403,21 @@ systemd_async_dispatch(DBusPendingCall *pending, void *user_data)
1f016a
     }
1f016a
 
1f016a
     operation_finalize(op);
1f016a
+}
1f016a
+
1f016a
+static void
1f016a
+systemd_async_dispatch(DBusPendingCall *pending, void *user_data)
1f016a
+{
1f016a
+    DBusError error;
1f016a
+    DBusMessage *reply = NULL;
1f016a
+    svc_action_t *op = user_data;
1f016a
+
1f016a
+    dbus_error_init(&error);
1f016a
+    if(pending) {
1f016a
+        reply = dbus_pending_call_steal_reply(pending);
1f016a
+    }
1f016a
+
1f016a
+    systemd_exec_result(reply, op);
1f016a
 
1f016a
     if(pending) {
1f016a
         dbus_pending_call_unref(pending);
1f016a
@@ -383,61 +429,56 @@ systemd_async_dispatch(DBusPendingCall *pending, void *user_data)
1f016a
 
1f016a
 #define SYSTEMD_OVERRIDE_ROOT "/run/systemd/system/"
1f016a
 
1f016a
+static void
1f016a
+systemd_unit_check(const char *name, const char *state, void *userdata)
1f016a
+{
1f016a
+    svc_action_t * op = userdata;
1f016a
+    
1f016a
+    CRM_ASSERT(state != NULL);
1f016a
+
1f016a
+    if (g_strcmp0(state, "active") == 0) {
1f016a
+        op->rc = PCMK_OCF_OK;
1f016a
+    } else if (g_strcmp0(state, "activating") == 0) {
1f016a
+        op->rc = PCMK_OCF_PENDING;
1f016a
+    } else {
1f016a
+        op->rc = PCMK_OCF_NOT_RUNNING;
1f016a
+    }
1f016a
+
1f016a
+    if (op->synchronous == FALSE) {
1f016a
+        operation_finalize(op);
1f016a
+    }
1f016a
+}
1f016a
+
1f016a
 gboolean
1f016a
-systemd_unit_exec(svc_action_t * op, gboolean synchronous)
1f016a
+systemd_unit_exec_with_unit(svc_action_t * op, const char *unit)
1f016a
 {
1f016a
-    DBusError error;
1f016a
-    char *unit = NULL;
1f016a
-    const char *replace_s = "replace";
1f016a
-    gboolean pass = FALSE;
1f016a
     const char *method = op->action;
1f016a
-    char *name = systemd_service_name(op->agent);
1f016a
     DBusMessage *msg = NULL;
1f016a
     DBusMessage *reply = NULL;
1f016a
 
1f016a
-    dbus_error_init(&error);
1f016a
-    op->rc = PCMK_OCF_UNKNOWN_ERROR;
1f016a
-    CRM_ASSERT(systemd_init());
1f016a
-
1f016a
-    crm_debug("Performing %ssynchronous %s op on systemd unit %s named '%s'",
1f016a
-              synchronous ? "" : "a", op->action, op->agent, op->rsc);
1f016a
-
1f016a
-    if (safe_str_eq(op->action, "meta-data")) {
1f016a
-        op->stdout_data = systemd_unit_metadata(op->agent);
1f016a
-        op->rc = PCMK_OCF_OK;
1f016a
-        goto cleanup;
1f016a
-    }
1f016a
+    CRM_ASSERT(unit);
1f016a
 
1f016a
-    pass = systemd_unit_by_name(op->agent, &unit);
1f016a
-    if (pass == FALSE) {
1f016a
+    if (unit == NULL) {
1f016a
         crm_debug("Could not obtain unit named '%s'", op->agent);
1f016a
-#if 0
1f016a
-        if (error && strstr(error->message, "systemd1.NoSuchUnit")) {
1f016a
-            op->rc = PCMK_OCF_NOT_INSTALLED;
1f016a
-            op->status = PCMK_LRM_OP_NOT_INSTALLED;
1f016a
-        }
1f016a
-#endif
1f016a
+        op->rc = PCMK_OCF_NOT_INSTALLED;
1f016a
+        op->status = PCMK_LRM_OP_NOT_INSTALLED;
1f016a
         goto cleanup;
1f016a
     }
1f016a
 
1f016a
     if (safe_str_eq(op->action, "monitor") || safe_str_eq(method, "status")) {
1f016a
-        char *state = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, unit, BUS_NAME ".Unit", "ActiveState");
1f016a
-
1f016a
-        if (g_strcmp0(state, "active") == 0) {
1f016a
-            op->rc = PCMK_OCF_OK;
1f016a
-        } else if (g_strcmp0(state, "activating") == 0) {
1f016a
-            op->rc = PCMK_OCF_PENDING;
1f016a
-        } else {
1f016a
-            op->rc = PCMK_OCF_NOT_RUNNING;
1f016a
+        char *state = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, unit, BUS_NAME ".Unit", "ActiveState",
1f016a
+                                             op->synchronous?NULL:systemd_unit_check, op);
1f016a
+        if (op->synchronous) {
1f016a
+            systemd_unit_check("ActiveState", state, op);
1f016a
+            free(state);
1f016a
+            return op->rc == PCMK_OCF_OK;
1f016a
         }
1f016a
-
1f016a
-        free(state);
1f016a
-        goto cleanup;
1f016a
+        return TRUE;
1f016a
 
1f016a
     } else if (g_strcmp0(method, "start") == 0) {
1f016a
         FILE *file_strm = NULL;
1f016a
         char *override_dir = g_strdup_printf("%s/%s", SYSTEMD_OVERRIDE_ROOT, unit);
1f016a
-        char *override_file = g_strdup_printf("%s/50-pacemaker.conf", override_dir);
1f016a
+        char *override_file = g_strdup_printf("%s/%s/50-pacemaker.conf", SYSTEMD_OVERRIDE_ROOT, unit);
1f016a
 
1f016a
         method = "StartUnit";
1f016a
         crm_build_path(override_dir, 0755);
1f016a
@@ -446,11 +487,11 @@ systemd_unit_exec(svc_action_t * op, gboolean synchronous)
1f016a
         if (file_strm != NULL) {
1f016a
             int rc = fprintf(file_strm, "[Service]\nRestart=no");
1f016a
             if (rc < 0) {
1f016a
-                crm_perror(LOG_ERR, "Cannot write to systemd override file %s: %s (%d)", override_file, pcmk_strerror(errno), errno);
1f016a
+                crm_perror(LOG_ERR, "Cannot write to systemd override file %s", override_file);
1f016a
             }
1f016a
 
1f016a
         } else {
1f016a
-            crm_err("Cannot open systemd override file %s for writing: %s (%d)", override_file, pcmk_strerror(errno), errno);
1f016a
+            crm_err("Cannot open systemd override file %s for writing", override_file);
1f016a
         }
1f016a
 
1f016a
         if (file_strm != NULL) {
1f016a
@@ -471,6 +512,7 @@ systemd_unit_exec(svc_action_t * op, gboolean synchronous)
1f016a
 
1f016a
     } else if (g_strcmp0(method, "restart") == 0) {
1f016a
         method = "RestartUnit";
1f016a
+
1f016a
     } else {
1f016a
         op->rc = PCMK_OCF_UNIMPLEMENT_FEATURE;
1f016a
         goto cleanup;
1f016a
@@ -482,54 +524,66 @@ systemd_unit_exec(svc_action_t * op, gboolean synchronous)
1f016a
     CRM_ASSERT(msg != NULL);
1f016a
 
1f016a
     /* (ss) */
1f016a
-    CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID));
1f016a
-    CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &replace_s, DBUS_TYPE_INVALID));
1f016a
+    {
1f016a
+        const char *replace_s = "replace";
1f016a
+        char *name = systemd_service_name(op->agent);
1f016a
+
1f016a
+        CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID));
1f016a
+        CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &replace_s, DBUS_TYPE_INVALID));
1f016a
 
1f016a
-    if (synchronous == FALSE) {
1f016a
-        free(unit);
1f016a
         free(name);
1f016a
-        return pcmk_dbus_send(msg, systemd_proxy, systemd_async_dispatch, op);
1f016a
     }
1f016a
 
1f016a
-    dbus_error_init(&error);
1f016a
-    reply = pcmk_dbus_send_recv(msg, systemd_proxy, &error);
1f016a
-
1f016a
-    if(error.name) {
1f016a
-        /* ignore "already started" or "not running" errors */
1f016a
-        if(!systemd_mask_error(op, error.name)) {
1f016a
-            crm_err("Could not issue %s for %s: %s (%s)", method, op->rsc, error.name, unit);
1f016a
-        }
1f016a
-        goto cleanup;
1f016a
-
1f016a
-    } else if(!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __FUNCTION__, __LINE__)) {
1f016a
-        crm_warn("Call to %s passed but return type was unexpected", op->action);
1f016a
-        op->rc = PCMK_OCF_OK;
1f016a
+    if (op->synchronous == FALSE) {
1f016a
+        return pcmk_dbus_send(msg, systemd_proxy, systemd_async_dispatch, op);
1f016a
 
1f016a
     } else {
1f016a
-        const char *path = NULL;
1f016a
+        DBusError error;
1f016a
 
1f016a
-        dbus_message_get_args (reply, NULL,
1f016a
-                               DBUS_TYPE_OBJECT_PATH, &path,
1f016a
-                               DBUS_TYPE_INVALID);
1f016a
-        crm_info("Call to %s passed: %s", op->action, path);
1f016a
-        op->rc = PCMK_OCF_OK;
1f016a
+        reply = pcmk_dbus_send_recv(msg, systemd_proxy, &error);
1f016a
+        systemd_exec_result(reply, op);
1f016a
+        if(reply) {
1f016a
+            dbus_message_unref(reply);
1f016a
+        }
1f016a
     }
1f016a
 
1f016a
-  cleanup:
1f016a
-    free(unit);
1f016a
-    free(name);
1f016a
-
1f016a
     if(msg) {
1f016a
         dbus_message_unref(msg);
1f016a
     }
1f016a
 
1f016a
-    if(reply) {
1f016a
-        dbus_message_unref(reply);
1f016a
+  cleanup:
1f016a
+    if (op->synchronous == FALSE) {
1f016a
+        operation_finalize(op);
1f016a
+        return TRUE;
1f016a
     }
1f016a
 
1f016a
-    if (synchronous == FALSE) {
1f016a
-        operation_finalize(op);
1f016a
+    return op->rc == PCMK_OCF_OK;
1f016a
+}
1f016a
+
1f016a
+gboolean
1f016a
+systemd_unit_exec(svc_action_t * op)
1f016a
+{
1f016a
+    CRM_ASSERT(op);
1f016a
+    CRM_ASSERT(systemd_init());
1f016a
+    op->rc = PCMK_OCF_UNKNOWN_ERROR;
1f016a
+    crm_debug("Performing %ssynchronous %s op on systemd unit %s named '%s'",
1f016a
+              op->synchronous ? "" : "a", op->action, op->agent, op->rsc);
1f016a
+
1f016a
+    if (safe_str_eq(op->action, "meta-data")) {
1f016a
+        /* TODO: See if we can teach the lrmd not to make these calls synchronously */
1f016a
+        op->stdout_data = systemd_unit_metadata(op->agent);
1f016a
+        op->rc = PCMK_OCF_OK;
1f016a
+
1f016a
+        if (op->synchronous == FALSE) {
1f016a
+            operation_finalize(op);
1f016a
+        }
1f016a
         return TRUE;
1f016a
     }
1f016a
+
1f016a
+    systemd_unit_by_name(op->agent, op);
1f016a
+    if (op->synchronous == FALSE) {
1f016a
+        return TRUE;
1f016a
+    }
1f016a
+
1f016a
     return op->rc == PCMK_OCF_OK;
1f016a
 }
1f016a
diff --git a/lib/services/systemd.h b/lib/services/systemd.h
1f016a
index 6e1b80b..c86bafe 100644
1f016a
--- a/lib/services/systemd.h
1f016a
+++ b/lib/services/systemd.h
1f016a
@@ -17,7 +17,7 @@
1f016a
  */
1f016a
 
1f016a
 G_GNUC_INTERNAL GList *systemd_unit_listall(void);
1f016a
-G_GNUC_INTERNAL int systemd_unit_exec(svc_action_t * op, gboolean synchronous);
1f016a
+G_GNUC_INTERNAL int systemd_unit_exec(svc_action_t * op);
1f016a
 G_GNUC_INTERNAL gboolean systemd_unit_exists(const gchar * name);
1f016a
 G_GNUC_INTERNAL gboolean systemd_unit_running(const gchar * name);
1f016a
 G_GNUC_INTERNAL void systemd_cleanup(void);
1f016a
diff --git a/lib/services/upstart.c b/lib/services/upstart.c
1f016a
index f47e8ff..4c7211d 100644
1f016a
--- a/lib/services/upstart.c
1f016a
+++ b/lib/services/upstart.c
1f016a
@@ -275,6 +275,10 @@ get_first_instance(const gchar * job)
1f016a
         crm_err("Call to %s failed: %s", method, error.name);
1f016a
         goto done;
1f016a
 
1f016a
+    } else if(reply == NULL) {
1f016a
+        crm_err("Call to %s failed: no reply", method);
1f016a
+        goto done;
1f016a
+
1f016a
     } else if (!dbus_message_iter_init(reply, &args)) {
1f016a
         crm_err("Call to %s failed: Message has no arguments", method);
1f016a
         goto done;
1f016a
@@ -304,31 +308,22 @@ get_first_instance(const gchar * job)
1f016a
     return instance;
1f016a
 }
1f016a
 
1f016a
-gboolean
1f016a
-upstart_job_running(const gchar * name)
1f016a
+static void
1f016a
+upstart_job_check(const char *name, const char *state, void *userdata)
1f016a
 {
1f016a
-    bool running = FALSE;
1f016a
-    char *job = NULL;
1f016a
-
1f016a
-    if(upstart_job_by_name(name, &job)) {
1f016a
-        char *path = get_first_instance(job);
1f016a
+    svc_action_t * op = userdata;
1f016a
 
1f016a
-        if (path) {
1f016a
-            char *state = pcmk_dbus_get_property(
1f016a
-                upstart_proxy, BUS_NAME, path, UPSTART_06_API ".Instance", "state");
1f016a
-
1f016a
-            crm_info("State of %s: %s", name, state);
1f016a
-            if (state) {
1f016a
-                running = !g_strcmp0(state, "running");
1f016a
-            }
1f016a
-            free(state);
1f016a
-        }
1f016a
-        free(path);
1f016a
+    if (state && g_strcmp0(state, "running") == 0) {
1f016a
+        op->rc = PCMK_OCF_OK;
1f016a
+    /* } else if (g_strcmp0(state, "activating") == 0) { */
1f016a
+    /*     op->rc = PCMK_OCF_PENDING; */
1f016a
+    } else {
1f016a
+        op->rc = PCMK_OCF_NOT_RUNNING;
1f016a
     }
1f016a
 
1f016a
-    free(job);
1f016a
-    crm_info("%s is%s running", name, running ? "" : " not");
1f016a
-    return running;
1f016a
+    if (op->synchronous == FALSE) {
1f016a
+        operation_finalize(op);
1f016a
+    }
1f016a
 }
1f016a
 
1f016a
 static char *
1f016a
@@ -465,10 +460,24 @@ upstart_job_exec(svc_action_t * op, gboolean synchronous)
1f016a
     }
1f016a
 
1f016a
     if (safe_str_eq(op->action, "monitor") || safe_str_eq(action, "status")) {
1f016a
-        if (upstart_job_running(op->agent)) {
1f016a
-            op->rc = PCMK_OCF_OK;
1f016a
-        } else {
1f016a
-            op->rc = PCMK_OCF_NOT_RUNNING;
1f016a
+
1f016a
+        char *path = get_first_instance(job);
1f016a
+
1f016a
+        op->rc = PCMK_OCF_NOT_RUNNING;
1f016a
+        if(path) {
1f016a
+            char *state = pcmk_dbus_get_property(
1f016a
+                upstart_proxy, BUS_NAME, path, UPSTART_06_API ".Instance", "state",
1f016a
+                op->synchronous?NULL:upstart_job_check, op);
1f016a
+
1f016a
+            free(job);
1f016a
+            free(path);
1f016a
+
1f016a
+            if(op->synchronous) {
1f016a
+                upstart_job_check("state", state, op);
1f016a
+                free(state);
1f016a
+                return op->rc == PCMK_OCF_OK;
1f016a
+            }
1f016a
+            return TRUE;
1f016a
         }
1f016a
         goto cleanup;
1f016a
 
1f016a
@@ -503,7 +512,7 @@ upstart_job_exec(svc_action_t * op, gboolean synchronous)
1f016a
 
1f016a
     CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_BOOLEAN, &arg_wait, DBUS_TYPE_INVALID));
1f016a
 
1f016a
-    if (synchronous == FALSE) {
1f016a
+    if (op->synchronous == FALSE) {
1f016a
         free(job);
1f016a
         return pcmk_dbus_send(msg, upstart_proxy, upstart_async_dispatch, op);
1f016a
     }
1f016a
@@ -545,7 +554,7 @@ upstart_job_exec(svc_action_t * op, gboolean synchronous)
1f016a
         dbus_message_unref(reply);
1f016a
     }
1f016a
 
1f016a
-    if (synchronous == FALSE) {
1f016a
+    if (op->synchronous == FALSE) {
1f016a
         operation_finalize(op);
1f016a
         return TRUE;
1f016a
     }
1f016a
diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c
1f016a
index f3abfdb..7075b9f 100644
1f016a
--- a/lrmd/lrmd.c
1f016a
+++ b/lrmd/lrmd.c
1f016a
@@ -874,6 +874,12 @@ stonith_action_complete(lrmd_cmd_t * cmd, int rc)
1f016a
     if (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED) {
1f016a
         recurring = 0;
1f016a
         /* do nothing */
1f016a
+
1f016a
+    } else if (rc == -ENODEV && safe_str_eq(cmd->action, "monitor")) {
1f016a
+        /* Not registered == inactive */
1f016a
+        cmd->lrmd_op_status = PCMK_LRM_OP_DONE;
1f016a
+        cmd->exec_rc = PCMK_OCF_NOT_RUNNING;
1f016a
+
1f016a
     } else if (rc) {
1f016a
         /* Attempt to map return codes to op status if possible */
1f016a
         switch (rc) {
1f016a
@@ -884,6 +890,7 @@ stonith_action_complete(lrmd_cmd_t * cmd, int rc)
1f016a
                 cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT;
1f016a
                 break;
1f016a
             default:
1f016a
+                /* TODO: This looks wrong.  Status should be _DONE and exec_rc set to an error */
1f016a
                 cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
1f016a
         }
1f016a
     } else {
1f016a
diff --git a/lrmd/regression.py.in b/lrmd/regression.py.in
1f016a
index b6b6718..a9a32ef 100755
1f016a
--- a/lrmd/regression.py.in
1f016a
+++ b/lrmd/regression.py.in
1f016a
@@ -240,6 +240,13 @@ class Tests:
1f016a
                 self.action_timeout = " -t 5000 "
1f016a
 		if self.tls:
1f016a
 			self.rsc_classes.remove("stonith")
1f016a
+		if "systemd" in self.rsc_classes:
1f016a
+			# the lrmd_dummy_daemon requires this, we are importing it
1f016a
+			# here just to guarantee it is installed before allowing this
1f016a
+			# script to run. Otherwise, running without this import being
1f016a
+			# available will make all the systemd tests look like they fail,
1f016a
+			# which is really scary looking. I'd rather see the import fail.
1f016a
+			import systemd.daemon
1f016a
 
1f016a
 		print "Testing "+repr(self.rsc_classes)
1f016a
 
1f016a
diff --git a/mcp/pacemaker.combined.upstart.in b/mcp/pacemaker.combined.upstart.in
1f016a
index 9540019..6301d10 100644
1f016a
--- a/mcp/pacemaker.combined.upstart.in
1f016a
+++ b/mcp/pacemaker.combined.upstart.in
1f016a
@@ -30,6 +30,9 @@ pre-start script
1f016a
     # give it time to fail.
1f016a
     sleep 2
1f016a
     pidof corosync || { exit 1; }
1f016a
+
1f016a
+    # if you use crm_mon, uncomment the line below.
1f016a
+    #start crm_mon
1f016a
 end script
1f016a
 
1f016a
 post-start script
1f016a
@@ -59,6 +62,9 @@ post-stop script
1f016a
     # and invalidate above "respawn" stanza.
1f016a
     #pidof crmd && killall -q -9 corosync
1f016a
 
1f016a
+    # if you use crm_mon, uncomment the line below.
1f016a
+    #stop crm_mon
1f016a
+
1f016a
     # if you use corosync-notifyd, uncomment the line below.
1f016a
     #stop corosync-notifyd || true
1f016a
 end script
1f016a
diff --git a/pacemaker.spec.in b/pacemaker.spec.in
1f016a
index bee6bfc..597fb3a 100644
1f016a
--- a/pacemaker.spec.in
1f016a
+++ b/pacemaker.spec.in
1f016a
@@ -283,11 +283,13 @@ make DESTDIR=%{buildroot} docdir=%{pcmk_docdir} V=1 install
1f016a
 mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig
1f016a
 mkdir -p ${RPM_BUILD_ROOT}%{_var}/lib/pacemaker/cores
1f016a
 install -m 644 mcp/pacemaker.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/pacemaker
1f016a
+install -m 644 tools/crm_mon.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/crm_mon
1f016a
 
1f016a
 %if %{with upstart_job}
1f016a
 mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/init
1f016a
 install -m 644 mcp/pacemaker.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/pacemaker.conf
1f016a
 install -m 644 mcp/pacemaker.combined.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/pacemaker.combined.conf
1f016a
+install -m 644 tools/crm_mon.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/crm_mon.conf
1f016a
 %endif
1f016a
 
1f016a
 # Scripts that should be executable
1f016a
@@ -395,6 +397,7 @@ exit 0
1f016a
 %exclude %{_datadir}/pacemaker/tests
1f016a
 
1f016a
 %config(noreplace) %{_sysconfdir}/sysconfig/pacemaker
1f016a
+%config(noreplace) %{_sysconfdir}/sysconfig/crm_mon
1f016a
 %config(noreplace) %{_sysconfdir}/logrotate.d/pacemaker
1f016a
 %{_sbindir}/pacemakerd
1f016a
 
1f016a
@@ -451,6 +454,7 @@ exit 0
1f016a
 %if %{with upstart_job}
1f016a
 %config(noreplace) %{_sysconfdir}/init/pacemaker.conf
1f016a
 %config(noreplace) %{_sysconfdir}/init/pacemaker.combined.conf
1f016a
+%config(noreplace) %{_sysconfdir}/init/crm_mon.conf
1f016a
 %endif
1f016a
 
1f016a
 %files cli
1f016a
diff --git a/pengine/allocate.c b/pengine/allocate.c
1f016a
index f9f9f3c..8d02d9b 100644
1f016a
--- a/pengine/allocate.c
1f016a
+++ b/pengine/allocate.c
1f016a
@@ -1680,16 +1680,41 @@ apply_remote_node_ordering(pe_working_set_t *data_set)
1f016a
                 action,
1f016a
                 pe_order_preserve | pe_order_implies_then | pe_order_runnable_left,
1f016a
                 data_set);
1f016a
-
1f016a
         } else if (safe_str_eq(action->task, "stop")) {
1f016a
-            custom_action_order(action->rsc,
1f016a
-                NULL,
1f016a
-                action,
1f016a
-                remote_rsc,
1f016a
-                generate_op_key(remote_rsc->id, RSC_STOP, 0),
1f016a
-                NULL,
1f016a
-                pe_order_preserve | pe_order_implies_first,
1f016a
-                data_set);
1f016a
+            gboolean after_start = FALSE;
1f016a
+
1f016a
+            /* handle special case with baremetal remote where stop actions need to be
1f016a
+             * ordered after the connection resource starts somewhere else. */
1f016a
+            if (is_baremetal_remote_node(action->node)) {
1f016a
+                node_t *cluster_node = remote_rsc->running_on ? remote_rsc->running_on->data : NULL;
1f016a
+
1f016a
+                /* if the current cluster node a baremetal connection resource
1f016a
+                 * is residing on is unclean, we can't process any operations on that
1f016a
+                 * remote node until after it starts somewhere else. */
1f016a
+                if (cluster_node && cluster_node->details->unclean == TRUE) {
1f016a
+                    after_start = TRUE;
1f016a
+                }
1f016a
+            }
1f016a
+
1f016a
+            if (after_start) {
1f016a
+                custom_action_order(remote_rsc,
1f016a
+                    generate_op_key(remote_rsc->id, RSC_START, 0),
1f016a
+                    NULL,
1f016a
+                    action->rsc,
1f016a
+                    NULL,
1f016a
+                    action,
1f016a
+                    pe_order_preserve | pe_order_implies_then | pe_order_runnable_left,
1f016a
+                    data_set);
1f016a
+            } else {
1f016a
+                custom_action_order(action->rsc,
1f016a
+                    NULL,
1f016a
+                    action,
1f016a
+                    remote_rsc,
1f016a
+                    generate_op_key(remote_rsc->id, RSC_STOP, 0),
1f016a
+                    NULL,
1f016a
+                    pe_order_preserve | pe_order_implies_first,
1f016a
+                    data_set);
1f016a
+            }
1f016a
         }
1f016a
     }
1f016a
 }
1f016a
diff --git a/pengine/regression.sh b/pengine/regression.sh
1f016a
index 5f98215..bdc7d3a 100755
1f016a
--- a/pengine/regression.sh
1f016a
+++ b/pengine/regression.sh
1f016a
@@ -762,9 +762,11 @@ echo ""
1f016a
 do_test remote-startup-probes  "Baremetal remote-node startup probes"
1f016a
 do_test remote-startup         "Startup a newly discovered remote-nodes with no status."
1f016a
 do_test remote-fence-unclean   "Fence unclean baremetal remote-node"
1f016a
+do_test remote-fence-unclean2  "Fence baremetal remote-node after cluster node fails and connection can not be recovered"
1f016a
 do_test remote-move            "Move remote-node connection resource"
1f016a
 do_test remote-disable         "Disable a baremetal remote-node"
1f016a
 do_test remote-orphaned        "Properly shutdown orphaned connection resource"
1f016a
+do_test remote-recover         "Recover connection resource after cluster-node fails."
1f016a
 do_test remote-stale-node-entry "Make sure we properly handle leftover remote-node entries in the node section"
1f016a
 echo ""
1f016a
 test_results
1f016a
diff --git a/pengine/test10/remote-fence-unclean2.dot b/pengine/test10/remote-fence-unclean2.dot
1f016a
new file mode 100644
1f016a
index 0000000..6cff564
1f016a
--- /dev/null
1f016a
+++ b/pengine/test10/remote-fence-unclean2.dot
1f016a
@@ -0,0 +1,10 @@
1f016a
+digraph "g" {
1f016a
+"all_stopped" [ style=bold color="green" fontcolor="orange"]
1f016a
+"fake_stop_0 rhel7-alt4" -> "all_stopped" [ style = bold]
1f016a
+"fake_stop_0 rhel7-alt4" [ style=bold color="green" fontcolor="orange"]
1f016a
+"stonith 'reboot' rhel7-alt4" -> "fake_stop_0 rhel7-alt4" [ style = bold]
1f016a
+"stonith 'reboot' rhel7-alt4" -> "stonith_complete" [ style = bold]
1f016a
+"stonith 'reboot' rhel7-alt4" [ style=bold color="green" fontcolor="black"]
1f016a
+"stonith_complete" -> "all_stopped" [ style = bold]
1f016a
+"stonith_complete" [ style=bold color="green" fontcolor="orange"]
1f016a
+}
1f016a
diff --git a/pengine/test10/remote-fence-unclean2.exp b/pengine/test10/remote-fence-unclean2.exp
1f016a
new file mode 100644
1f016a
index 0000000..e58b617
1f016a
--- /dev/null
1f016a
+++ b/pengine/test10/remote-fence-unclean2.exp
1f016a
@@ -0,0 +1,49 @@
1f016a
+<transition_graph cluster-delay="60s" stonith-timeout="60s" failed-stop-offset="INFINITY" failed-start-offset="INFINITY"  transition_id="0">
1f016a
+  <synapse id="0">
1f016a
+    <action_set>
1f016a
+      <pseudo_event id="6" operation="stop" operation_key="fake_stop_0">
1f016a
+        <attributes CRM_meta_name="stop" CRM_meta_timeout="20000" />
1f016a
+      </pseudo_event>
1f016a
+    </action_set>
1f016a
+    <inputs>
1f016a
+      <trigger>
1f016a
+        <crm_event id="8" operation="stonith" operation_key="stonith-rhel7-alt4-reboot" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4"/>
1f016a
+      </trigger>
1f016a
+    </inputs>
1f016a
+  </synapse>
1f016a
+  <synapse id="1">
1f016a
+    <action_set>
1f016a
+      <crm_event id="8" operation="stonith" operation_key="stonith-rhel7-alt4-reboot" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4">
1f016a
+        <attributes CRM_meta_last_failure_fake="1411503989" CRM_meta_on_node="rhel7-alt4" CRM_meta_on_node_uuid="rhel7-alt4" CRM_meta_probe_complete="true" CRM_meta_stonith_action="reboot" />
1f016a
+      </crm_event>
1f016a
+    </action_set>
1f016a
+    <inputs/>
1f016a
+  </synapse>
1f016a
+  <synapse id="2">
1f016a
+    <action_set>
1f016a
+      <pseudo_event id="7" operation="stonith_complete" operation_key="stonith_complete">
1f016a
+        <attributes />
1f016a
+      </pseudo_event>
1f016a
+    </action_set>
1f016a
+    <inputs>
1f016a
+      <trigger>
1f016a
+        <crm_event id="8" operation="stonith" operation_key="stonith-rhel7-alt4-reboot" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4"/>
1f016a
+      </trigger>
1f016a
+    </inputs>
1f016a
+  </synapse>
1f016a
+  <synapse id="3">
1f016a
+    <action_set>
1f016a
+      <pseudo_event id="1" operation="all_stopped" operation_key="all_stopped">
1f016a
+        <attributes />
1f016a
+      </pseudo_event>
1f016a
+    </action_set>
1f016a
+    <inputs>
1f016a
+      <trigger>
1f016a
+        <pseudo_event id="6" operation="stop" operation_key="fake_stop_0"/>
1f016a
+      </trigger>
1f016a
+      <trigger>
1f016a
+        <pseudo_event id="7" operation="stonith_complete" operation_key="stonith_complete"/>
1f016a
+      </trigger>
1f016a
+    </inputs>
1f016a
+  </synapse>
1f016a
+</transition_graph>
1f016a
diff --git a/pengine/test10/remote-fence-unclean2.scores b/pengine/test10/remote-fence-unclean2.scores
1f016a
new file mode 100644
1f016a
index 0000000..10fc7fd
1f016a
--- /dev/null
1f016a
+++ b/pengine/test10/remote-fence-unclean2.scores
1f016a
@@ -0,0 +1,13 @@
1f016a
+Allocation scores:
1f016a
+native_color: fake allocation score on rhel7-alt1: 0
1f016a
+native_color: fake allocation score on rhel7-alt2: 0
1f016a
+native_color: fake allocation score on rhel7-alt3: 0
1f016a
+native_color: fake allocation score on rhel7-alt4: INFINITY
1f016a
+native_color: rhel7-alt4 allocation score on rhel7-alt1: 0
1f016a
+native_color: rhel7-alt4 allocation score on rhel7-alt2: 0
1f016a
+native_color: rhel7-alt4 allocation score on rhel7-alt3: 0
1f016a
+native_color: rhel7-alt4 allocation score on rhel7-alt4: -INFINITY
1f016a
+native_color: shooter allocation score on rhel7-alt1: 0
1f016a
+native_color: shooter allocation score on rhel7-alt2: 0
1f016a
+native_color: shooter allocation score on rhel7-alt3: 0
1f016a
+native_color: shooter allocation score on rhel7-alt4: -INFINITY
1f016a
diff --git a/pengine/test10/remote-fence-unclean2.summary b/pengine/test10/remote-fence-unclean2.summary
1f016a
new file mode 100644
1f016a
index 0000000..bfaf77b
1f016a
--- /dev/null
1f016a
+++ b/pengine/test10/remote-fence-unclean2.summary
1f016a
@@ -0,0 +1,30 @@
1f016a
+
1f016a
+Current cluster status:
1f016a
+Node rhel7-alt1 (1): standby
1f016a
+Node rhel7-alt2 (2): standby
1f016a
+RemoteNode rhel7-alt4: UNCLEAN (offline)
1f016a
+OFFLINE: [ rhel7-alt3 ]
1f016a
+
1f016a
+ shooter	(stonith:fence_xvm):	Stopped 
1f016a
+ rhel7-alt4	(ocf::pacemaker:remote):	Stopped 
1f016a
+ fake	(ocf::heartbeat:Dummy):	Started rhel7-alt4 
1f016a
+
1f016a
+Transition Summary:
1f016a
+ * Stop    fake	(rhel7-alt4)
1f016a
+
1f016a
+Executing cluster transition:
1f016a
+ * Fencing rhel7-alt4 (reboot)
1f016a
+ * Pseudo action:   stonith_complete
1f016a
+ * Pseudo action:   fake_stop_0
1f016a
+ * Pseudo action:   all_stopped
1f016a
+
1f016a
+Revised cluster status:
1f016a
+Node rhel7-alt1 (1): standby
1f016a
+Node rhel7-alt2 (2): standby
1f016a
+OFFLINE: [ rhel7-alt3 ]
1f016a
+RemoteOFFLINE: [ rhel7-alt4 ]
1f016a
+
1f016a
+ shooter	(stonith:fence_xvm):	Stopped 
1f016a
+ rhel7-alt4	(ocf::pacemaker:remote):	Stopped 
1f016a
+ fake	(ocf::heartbeat:Dummy):	Stopped 
1f016a
+
1f016a
diff --git a/pengine/test10/remote-fence-unclean2.xml b/pengine/test10/remote-fence-unclean2.xml
1f016a
new file mode 100644
1f016a
index 0000000..78fc4f1
1f016a
--- /dev/null
1f016a
+++ b/pengine/test10/remote-fence-unclean2.xml
1f016a
@@ -0,0 +1,115 @@
1f016a
+<cib crm_feature_set="3.0.9" validate-with="pacemaker-2.1" epoch="13" num_updates="8" admin_epoch="0" cib-last-written="Tue Sep 23 16:28:22 2014" have-quorum="1" dc-uuid="2">
1f016a
+  <configuration>
1f016a
+    <crm_config>
1f016a
+      <cluster_property_set id="cib-bootstrap-options">
1f016a
+        <nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.1.12-6da3f72"/>
1f016a
+        <nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="corosync"/>
1f016a
+        <nvpair id="cib-bootstrap-options-cluster-name" name="cluster-name" value="phd"/>
1f016a
+        <nvpair id="cib-bootstrap-options-last-lrm-refresh" name="last-lrm-refresh" value="1411504087"/>
1f016a
+      </cluster_property_set>
1f016a
+    </crm_config>
1f016a
+    <nodes>
1f016a
+      <node id="1" uname="rhel7-alt1">
1f016a
+        <instance_attributes id="nodes-1">
1f016a
+          <nvpair id="nodes-1-standby" name="standby" value="on"/>
1f016a
+        </instance_attributes>
1f016a
+      </node>
1f016a
+      <node id="2" uname="rhel7-alt2">
1f016a
+        <instance_attributes id="nodes-2">
1f016a
+          <nvpair id="nodes-2-standby" name="standby" value="on"/>
1f016a
+        </instance_attributes>
1f016a
+      </node>
1f016a
+      <node id="3" uname="rhel7-alt3"/>
1f016a
+    </nodes>
1f016a
+    <resources>
1f016a
+      <primitive class="stonith" id="shooter" type="fence_xvm">
1f016a
+        <instance_attributes id="shooter-instance_attributes"/>
1f016a
+        <operations>
1f016a
+          <op id="shooter-monitor-interval-60s" interval="60s" name="monitor"/>
1f016a
+        </operations>
1f016a
+      </primitive>
1f016a
+      <primitive class="ocf" id="rhel7-alt4" provider="pacemaker" type="remote">
1f016a
+        <instance_attributes id="rhel7-alt4-instance_attributes"/>
1f016a
+        <operations>
1f016a
+          <op id="rhel7-alt4-start-timeout-15" interval="0s" name="start" timeout="15"/>
1f016a
+          <op id="rhel7-alt4-stop-timeout-15" interval="0s" name="stop" timeout="15"/>
1f016a
+          <op id="rhel7-alt4-monitor-timeout-15" interval="60s" name="monitor" timeout="15"/>
1f016a
+        </operations>
1f016a
+      </primitive>
1f016a
+      <primitive class="ocf" id="fake" provider="heartbeat" type="Dummy">
1f016a
+        <instance_attributes id="fake-instance_attributes"/>
1f016a
+        <operations>
1f016a
+          <op id="fake-start-timeout-20" interval="0s" name="start" timeout="20"/>
1f016a
+          <op id="fake-stop-timeout-20" interval="0s" name="stop" timeout="20"/>
1f016a
+          <op id="fake-monitor-interval-10" interval="10" name="monitor" timeout="20"/>
1f016a
+        </operations>
1f016a
+      </primitive>
1f016a
+    </resources>
1f016a
+    <constraints>
1f016a
+      <rsc_location id="location-fake-rhel7-alt4-INFINITY" node="rhel7-alt4" rsc="fake" score="INFINITY"/>
1f016a
+    </constraints>
1f016a
+  </configuration>
1f016a
+  <status>
1f016a
+    <node_state id="2" uname="rhel7-alt2" in_ccm="true" crmd="online" crm-debug-origin="post_cache_update" join="member" expected="member">
1f016a
+      <transient_attributes id="2">
1f016a
+        <instance_attributes id="status-2">
1f016a
+          <nvpair id="status-2-shutdown" name="shutdown" value="0"/>
1f016a
+          <nvpair id="status-2-probe_complete" name="probe_complete" value="true"/>
1f016a
+        </instance_attributes>
1f016a
+      </transient_attributes>
1f016a
+      <lrm id="2">
1f016a
+        <lrm_resources>
1f016a
+          <lrm_resource id="shooter" type="fence_xvm" class="stonith">
1f016a
+            <lrm_rsc_op id="shooter_last_0" operation_key="shooter_stop_0" operation="stop" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.9" transition-key="11:8:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;11:8:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="14" rc-code="0" op-status="0" interval="0" last-run="1411503701" last-rc-change="1411503701" exec-time="1" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt2"/>
1f016a
+          </lrm_resource>
1f016a
+          <lrm_resource id="rhel7-alt4" type="remote" class="ocf" provider="pacemaker">
1f016a
+            <lrm_rsc_op id="rhel7-alt4_last_0" operation_key="rhel7-alt4_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.9" transition-key="8:5:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;8:5:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="1" rc-code="7" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt2"/>
1f016a
+          </lrm_resource>
1f016a
+          <lrm_resource id="fake" type="Dummy" class="ocf" provider="heartbeat">
1f016a
+            <lrm_rsc_op id="fake_last_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="8:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;8:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="19" rc-code="7" op-status="0" interval="0" last-run="1411504086" last-rc-change="1411504086" exec-time="34" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt2" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
1f016a
+          </lrm_resource>
1f016a
+        </lrm_resources>
1f016a
+      </lrm>
1f016a
+    </node_state>
1f016a
+    <node_state id="1" uname="rhel7-alt1" in_ccm="true" crmd="online" crm-debug-origin="post_cache_update" join="member" expected="member">
1f016a
+      <lrm id="1">
1f016a
+        <lrm_resources>
1f016a
+          <lrm_resource id="shooter" type="fence_xvm" class="stonith">
1f016a
+            <lrm_rsc_op id="shooter_last_0" operation_key="shooter_stop_0" operation="stop" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="11:23:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;11:23:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="19" rc-code="0" op-status="0" interval="0" last-run="1411504102" last-rc-change="1411504102" exec-time="1" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt1"/>
1f016a
+            <lrm_rsc_op id="shooter_monitor_60000" operation_key="shooter_monitor_60000" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="16:15:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;16:15:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="12" rc-code="0" op-status="0" interval="60000" last-rc-change="1411504079" exec-time="10" queue-time="0" op-digest="4811cef7f7f94e3a35a70be7916cb2fd" on_node="rhel7-alt1"/>
1f016a
+          </lrm_resource>
1f016a
+          <lrm_resource id="rhel7-alt4" type="remote" class="ocf" provider="pacemaker">
1f016a
+            <lrm_rsc_op id="rhel7-alt4_last_0" operation_key="rhel7-alt4_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="9:15:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;9:15:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="1" rc-code="7" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt1"/>
1f016a
+          </lrm_resource>
1f016a
+          <lrm_resource id="fake" type="Dummy" class="ocf" provider="heartbeat">
1f016a
+            <lrm_rsc_op id="fake_last_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="8:18:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;8:18:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="17" rc-code="7" op-status="0" interval="0" last-run="1411504087" last-rc-change="1411504087" exec-time="29" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt1" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
1f016a
+          </lrm_resource>
1f016a
+        </lrm_resources>
1f016a
+      </lrm>
1f016a
+      <transient_attributes id="1">
1f016a
+        <instance_attributes id="status-1">
1f016a
+          <nvpair id="status-1-shutdown" name="shutdown" value="0"/>
1f016a
+          <nvpair id="status-1-probe_complete" name="probe_complete" value="true"/>
1f016a
+        </instance_attributes>
1f016a
+      </transient_attributes>
1f016a
+    </node_state>
1f016a
+    <node_state id="3" uname="rhel7-alt3" in_ccm="false" crmd="offline" crm-debug-origin="send_stonith_update" join="down" expected="down"/>
1f016a
+    <node_state id="rhel7-alt4" remote_node="true" uname="rhel7-alt4" crm-debug-origin="post_cache_update">
1f016a
+      <lrm id="rhel7-alt4">
1f016a
+        <lrm_resources>
1f016a
+          <lrm_resource id="fake" type="Dummy" class="ocf" provider="heartbeat">
1f016a
+            <lrm_rsc_op id="fake_last_failure_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="34" rc-code="0" op-status="0" interval="0" last-run="1411504087" last-rc-change="1411504087" exec-time="29" queue-time="1" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
1f016a
+            <lrm_rsc_op id="fake_last_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="34" rc-code="0" op-status="0" interval="0" last-run="1411504087" last-rc-change="1411504087" exec-time="29" queue-time="1" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt3" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
1f016a
+            <lrm_rsc_op id="fake_monitor_10000" operation_key="fake_monitor_10000" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="16:22:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;16:22:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="35" rc-code="0" op-status="0" interval="10000" last-rc-change="1411504087" exec-time="29" queue-time="0" op-digest="4811cef7f7f94e3a35a70be7916cb2fd" on_node="rhel7-alt3"/>
1f016a
+          </lrm_resource>
1f016a
+        </lrm_resources>
1f016a
+      </lrm>
1f016a
+      <transient_attributes id="rhel7-alt4">
1f016a
+        <instance_attributes id="status-rhel7-alt4">
1f016a
+          <nvpair id="status-rhel7-alt4-probe_complete" name="probe_complete" value="true"/>
1f016a
+          <nvpair id="status-rhel7-alt4-last-failure-fake" name="last-failure-fake" value="1411503989"/>
1f016a
+        </instance_attributes>
1f016a
+      </transient_attributes>
1f016a
+    </node_state>
1f016a
+  </status>
1f016a
+</cib>
1f016a
diff --git a/pengine/test10/remote-recover.dot b/pengine/test10/remote-recover.dot
1f016a
new file mode 100644
1f016a
index 0000000..1da6a7b
1f016a
--- /dev/null
1f016a
+++ b/pengine/test10/remote-recover.dot
1f016a
@@ -0,0 +1,17 @@
1f016a
+ digraph "g" {
1f016a
+"all_stopped" [ style=bold color="green" fontcolor="orange"]
1f016a
+"fake_monitor_10000 rhel7-alt4" [ style=bold color="green" fontcolor="black"]
1f016a
+"fake_start_0 rhel7-alt4" -> "fake_monitor_10000 rhel7-alt4" [ style = bold]
1f016a
+"fake_start_0 rhel7-alt4" [ style=bold color="green" fontcolor="black"]
1f016a
+"fake_stop_0 rhel7-alt4" -> "all_stopped" [ style = bold]
1f016a
+"fake_stop_0 rhel7-alt4" -> "fake_start_0 rhel7-alt4" [ style = bold]
1f016a
+"fake_stop_0 rhel7-alt4" [ style=bold color="green" fontcolor="black"]
1f016a
+"rhel7-alt4_monitor_60000 rhel7-alt1" [ style=bold color="green" fontcolor="black"]
1f016a
+"rhel7-alt4_start_0 rhel7-alt1" -> "fake_monitor_10000 rhel7-alt4" [ style = bold]
1f016a
+"rhel7-alt4_start_0 rhel7-alt1" -> "fake_start_0 rhel7-alt4" [ style = bold]
1f016a
+"rhel7-alt4_start_0 rhel7-alt1" -> "rhel7-alt4_monitor_60000 rhel7-alt1" [ style = bold]
1f016a
+"rhel7-alt4_start_0 rhel7-alt1" [ style=bold color="green" fontcolor="black"]
1f016a
+"shooter_monitor_60000 rhel7-alt1" [ style=bold color="green" fontcolor="black"]
1f016a
+"shooter_start_0 rhel7-alt1" -> "shooter_monitor_60000 rhel7-alt1" [ style = bold]
1f016a
+"shooter_start_0 rhel7-alt1" [ style=bold color="green" fontcolor="black"]
1f016a
+}
1f016a
diff --git a/pengine/test10/remote-recover.exp b/pengine/test10/remote-recover.exp
1f016a
new file mode 100644
1f016a
index 0000000..37e4f71
1f016a
--- /dev/null
1f016a
+++ b/pengine/test10/remote-recover.exp
1f016a
@@ -0,0 +1,99 @@
1f016a
+<transition_graph cluster-delay="60s" stonith-timeout="60s" failed-stop-offset="INFINITY" failed-start-offset="INFINITY"  transition_id="0">
1f016a
+  <synapse id="0">
1f016a
+    <action_set>
1f016a
+      <rsc_op id="8" operation="monitor" operation_key="shooter_monitor_60000" on_node="rhel7-alt1" on_node_uuid="1">
1f016a
+        <primitive id="shooter" class="stonith" type="fence_xvm"/>
1f016a
+        <attributes CRM_meta_interval="60000" CRM_meta_name="monitor" CRM_meta_timeout="20000" />
1f016a
+      </rsc_op>
1f016a
+    </action_set>
1f016a
+    <inputs>
1f016a
+      <trigger>
1f016a
+        <rsc_op id="7" operation="start" operation_key="shooter_start_0" on_node="rhel7-alt1" on_node_uuid="1"/>
1f016a
+      </trigger>
1f016a
+    </inputs>
1f016a
+  </synapse>
1f016a
+  <synapse id="1">
1f016a
+    <action_set>
1f016a
+      <rsc_op id="7" operation="start" operation_key="shooter_start_0" on_node="rhel7-alt1" on_node_uuid="1">
1f016a
+        <primitive id="shooter" class="stonith" type="fence_xvm"/>
1f016a
+        <attributes CRM_meta_timeout="20000" />
1f016a
+      </rsc_op>
1f016a
+    </action_set>
1f016a
+    <inputs/>
1f016a
+  </synapse>
1f016a
+  <synapse id="2">
1f016a
+    <action_set>
1f016a
+      <rsc_op id="10" operation="monitor" operation_key="rhel7-alt4_monitor_60000" on_node="rhel7-alt1" on_node_uuid="1">
1f016a
+        <primitive id="rhel7-alt4" class="ocf" provider="pacemaker" type="remote"/>
1f016a
+        <attributes CRM_meta_interval="60000" CRM_meta_name="monitor" CRM_meta_timeout="15000" />
1f016a
+      </rsc_op>
1f016a
+    </action_set>
1f016a
+    <inputs>
1f016a
+      <trigger>
1f016a
+        <rsc_op id="9" operation="start" operation_key="rhel7-alt4_start_0" on_node="rhel7-alt1" on_node_uuid="1"/>
1f016a
+      </trigger>
1f016a
+    </inputs>
1f016a
+  </synapse>
1f016a
+  <synapse id="3">
1f016a
+    <action_set>
1f016a
+      <rsc_op id="9" operation="start" operation_key="rhel7-alt4_start_0" on_node="rhel7-alt1" on_node_uuid="1">
1f016a
+        <primitive id="rhel7-alt4" class="ocf" provider="pacemaker" type="remote"/>
1f016a
+        <attributes CRM_meta_name="start" CRM_meta_timeout="15000" />
1f016a
+      </rsc_op>
1f016a
+    </action_set>
1f016a
+    <inputs/>
1f016a
+  </synapse>
1f016a
+  <synapse id="4">
1f016a
+    <action_set>
1f016a
+      <rsc_op id="13" operation="monitor" operation_key="fake_monitor_10000" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4" router_node="rhel7-alt1">
1f016a
+        <primitive id="fake" class="ocf" provider="heartbeat" type="Dummy"/>
1f016a
+        <attributes CRM_meta_interval="10000" CRM_meta_name="monitor" CRM_meta_timeout="20000" />
1f016a
+      </rsc_op>
1f016a
+    </action_set>
1f016a
+    <inputs>
1f016a
+      <trigger>
1f016a
+        <rsc_op id="9" operation="start" operation_key="rhel7-alt4_start_0" on_node="rhel7-alt1" on_node_uuid="1"/>
1f016a
+      </trigger>
1f016a
+      <trigger>
1f016a
+        <rsc_op id="12" operation="start" operation_key="fake_start_0" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4" router_node="rhel7-alt1"/>
1f016a
+      </trigger>
1f016a
+    </inputs>
1f016a
+  </synapse>
1f016a
+  <synapse id="5">
1f016a
+    <action_set>
1f016a
+      <rsc_op id="12" operation="start" operation_key="fake_start_0" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4" router_node="rhel7-alt1">
1f016a
+        <primitive id="fake" class="ocf" provider="heartbeat" type="Dummy"/>
1f016a
+        <attributes CRM_meta_name="start" CRM_meta_timeout="20000" />
1f016a
+      </rsc_op>
1f016a
+    </action_set>
1f016a
+    <inputs>
1f016a
+      <trigger>
1f016a
+        <rsc_op id="9" operation="start" operation_key="rhel7-alt4_start_0" on_node="rhel7-alt1" on_node_uuid="1"/>
1f016a
+      </trigger>
1f016a
+      <trigger>
1f016a
+        <rsc_op id="11" operation="stop" operation_key="fake_stop_0" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4" router_node="rhel7-alt1"/>
1f016a
+      </trigger>
1f016a
+    </inputs>
1f016a
+  </synapse>
1f016a
+  <synapse id="6">
1f016a
+    <action_set>
1f016a
+      <rsc_op id="11" operation="stop" operation_key="fake_stop_0" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4" router_node="rhel7-alt1">
1f016a
+        <primitive id="fake" class="ocf" provider="heartbeat" type="Dummy"/>
1f016a
+        <attributes CRM_meta_name="stop" CRM_meta_timeout="20000" />
1f016a
+      </rsc_op>
1f016a
+    </action_set>
1f016a
+    <inputs/>
1f016a
+  </synapse>
1f016a
+  <synapse id="7">
1f016a
+    <action_set>
1f016a
+      <pseudo_event id="1" operation="all_stopped" operation_key="all_stopped">
1f016a
+        <attributes />
1f016a
+      </pseudo_event>
1f016a
+    </action_set>
1f016a
+    <inputs>
1f016a
+      <trigger>
1f016a
+        <rsc_op id="11" operation="stop" operation_key="fake_stop_0" on_node="rhel7-alt4" on_node_uuid="rhel7-alt4" router_node="rhel7-alt1"/>
1f016a
+      </trigger>
1f016a
+    </inputs>
1f016a
+  </synapse>
1f016a
+</transition_graph>
1f016a
diff --git a/pengine/test10/remote-recover.scores b/pengine/test10/remote-recover.scores
1f016a
new file mode 100644
1f016a
index 0000000..10fc7fd
1f016a
--- /dev/null
1f016a
+++ b/pengine/test10/remote-recover.scores
1f016a
@@ -0,0 +1,13 @@
1f016a
+Allocation scores:
1f016a
+native_color: fake allocation score on rhel7-alt1: 0
1f016a
+native_color: fake allocation score on rhel7-alt2: 0
1f016a
+native_color: fake allocation score on rhel7-alt3: 0
1f016a
+native_color: fake allocation score on rhel7-alt4: INFINITY
1f016a
+native_color: rhel7-alt4 allocation score on rhel7-alt1: 0
1f016a
+native_color: rhel7-alt4 allocation score on rhel7-alt2: 0
1f016a
+native_color: rhel7-alt4 allocation score on rhel7-alt3: 0
1f016a
+native_color: rhel7-alt4 allocation score on rhel7-alt4: -INFINITY
1f016a
+native_color: shooter allocation score on rhel7-alt1: 0
1f016a
+native_color: shooter allocation score on rhel7-alt2: 0
1f016a
+native_color: shooter allocation score on rhel7-alt3: 0
1f016a
+native_color: shooter allocation score on rhel7-alt4: -INFINITY
1f016a
diff --git a/pengine/test10/remote-recover.summary b/pengine/test10/remote-recover.summary
1f016a
new file mode 100644
1f016a
index 0000000..8fd7480
1f016a
--- /dev/null
1f016a
+++ b/pengine/test10/remote-recover.summary
1f016a
@@ -0,0 +1,36 @@
1f016a
+
1f016a
+Current cluster status:
1f016a
+Node rhel7-alt2 (2): standby
1f016a
+RemoteNode rhel7-alt4: UNCLEAN (offline)
1f016a
+Online: [ rhel7-alt1 ]
1f016a
+OFFLINE: [ rhel7-alt3 ]
1f016a
+
1f016a
+ shooter	(stonith:fence_xvm):	Stopped 
1f016a
+ rhel7-alt4	(ocf::pacemaker:remote):	Stopped 
1f016a
+ fake	(ocf::heartbeat:Dummy):	Started rhel7-alt4 
1f016a
+
1f016a
+Transition Summary:
1f016a
+ * Start   shooter	(rhel7-alt1)
1f016a
+ * Start   rhel7-alt4	(rhel7-alt1)
1f016a
+ * Restart fake	(Started rhel7-alt4)
1f016a
+
1f016a
+Executing cluster transition:
1f016a
+ * Resource action: shooter         start on rhel7-alt1
1f016a
+ * Resource action: rhel7-alt4      start on rhel7-alt1
1f016a
+ * Resource action: fake            stop on rhel7-alt4
1f016a
+ * Pseudo action:   all_stopped
1f016a
+ * Resource action: shooter         monitor=60000 on rhel7-alt1
1f016a
+ * Resource action: rhel7-alt4      monitor=60000 on rhel7-alt1
1f016a
+ * Resource action: fake            start on rhel7-alt4
1f016a
+ * Resource action: fake            monitor=10000 on rhel7-alt4
1f016a
+
1f016a
+Revised cluster status:
1f016a
+Node rhel7-alt2 (2): standby
1f016a
+Online: [ rhel7-alt1 ]
1f016a
+OFFLINE: [ rhel7-alt3 ]
1f016a
+RemoteOnline: [ rhel7-alt4 ]
1f016a
+
1f016a
+ shooter	(stonith:fence_xvm):	Started rhel7-alt1 
1f016a
+ rhel7-alt4	(ocf::pacemaker:remote):	Started rhel7-alt1 
1f016a
+ fake	(ocf::heartbeat:Dummy):	Started rhel7-alt4 
1f016a
+
1f016a
diff --git a/pengine/test10/remote-recover.xml b/pengine/test10/remote-recover.xml
1f016a
new file mode 100644
1f016a
index 0000000..1a83dd9
1f016a
--- /dev/null
1f016a
+++ b/pengine/test10/remote-recover.xml
1f016a
@@ -0,0 +1,114 @@
1f016a
+<cib crm_feature_set="3.0.9" validate-with="pacemaker-2.1" epoch="13" num_updates="8" admin_epoch="0" cib-last-written="Tue Sep 23 16:28:22 2014" have-quorum="1" dc-uuid="2">
1f016a
+  <configuration>
1f016a
+    <crm_config>
1f016a
+      <cluster_property_set id="cib-bootstrap-options">
1f016a
+        <nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.1.12-6da3f72"/>
1f016a
+        <nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="corosync"/>
1f016a
+        <nvpair id="cib-bootstrap-options-cluster-name" name="cluster-name" value="phd"/>
1f016a
+        <nvpair id="cib-bootstrap-options-last-lrm-refresh" name="last-lrm-refresh" value="1411504087"/>
1f016a
+      </cluster_property_set>
1f016a
+    </crm_config>
1f016a
+    <nodes>
1f016a
+      <node id="1" uname="rhel7-alt1">
1f016a
+        <instance_attributes id="nodes-1">
1f016a
+        </instance_attributes>
1f016a
+      </node>
1f016a
+      <node id="2" uname="rhel7-alt2">
1f016a
+        <instance_attributes id="nodes-2">
1f016a
+          <nvpair id="nodes-2-standby" name="standby" value="on"/>
1f016a
+        </instance_attributes>
1f016a
+      </node>
1f016a
+      <node id="3" uname="rhel7-alt3"/>
1f016a
+    </nodes>
1f016a
+    <resources>
1f016a
+      <primitive class="stonith" id="shooter" type="fence_xvm">
1f016a
+        <instance_attributes id="shooter-instance_attributes"/>
1f016a
+        <operations>
1f016a
+          <op id="shooter-monitor-interval-60s" interval="60s" name="monitor"/>
1f016a
+        </operations>
1f016a
+      </primitive>
1f016a
+      <primitive class="ocf" id="rhel7-alt4" provider="pacemaker" type="remote">
1f016a
+        <instance_attributes id="rhel7-alt4-instance_attributes"/>
1f016a
+        <operations>
1f016a
+          <op id="rhel7-alt4-start-timeout-15" interval="0s" name="start" timeout="15"/>
1f016a
+          <op id="rhel7-alt4-stop-timeout-15" interval="0s" name="stop" timeout="15"/>
1f016a
+          <op id="rhel7-alt4-monitor-timeout-15" interval="60s" name="monitor" timeout="15"/>
1f016a
+        </operations>
1f016a
+      </primitive>
1f016a
+      <primitive class="ocf" id="fake" provider="heartbeat" type="Dummy">
1f016a
+        <instance_attributes id="fake-instance_attributes"/>
1f016a
+        <operations>
1f016a
+          <op id="fake-start-timeout-20" interval="0s" name="start" timeout="20"/>
1f016a
+          <op id="fake-stop-timeout-20" interval="0s" name="stop" timeout="20"/>
1f016a
+          <op id="fake-monitor-interval-10" interval="10" name="monitor" timeout="20"/>
1f016a
+        </operations>
1f016a
+      </primitive>
1f016a
+    </resources>
1f016a
+    <constraints>
1f016a
+      <rsc_location id="location-fake-rhel7-alt4-INFINITY" node="rhel7-alt4" rsc="fake" score="INFINITY"/>
1f016a
+    </constraints>
1f016a
+  </configuration>
1f016a
+  <status>
1f016a
+    <node_state id="2" uname="rhel7-alt2" in_ccm="true" crmd="online" crm-debug-origin="post_cache_update" join="member" expected="member">
1f016a
+      <transient_attributes id="2">
1f016a
+        <instance_attributes id="status-2">
1f016a
+          <nvpair id="status-2-shutdown" name="shutdown" value="0"/>
1f016a
+          <nvpair id="status-2-probe_complete" name="probe_complete" value="true"/>
1f016a
+        </instance_attributes>
1f016a
+      </transient_attributes>
1f016a
+      <lrm id="2">
1f016a
+        <lrm_resources>
1f016a
+          <lrm_resource id="shooter" type="fence_xvm" class="stonith">
1f016a
+            <lrm_rsc_op id="shooter_last_0" operation_key="shooter_stop_0" operation="stop" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.9" transition-key="11:8:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;11:8:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="14" rc-code="0" op-status="0" interval="0" last-run="1411503701" last-rc-change="1411503701" exec-time="1" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt2"/>
1f016a
+          </lrm_resource>
1f016a
+          <lrm_resource id="rhel7-alt4" type="remote" class="ocf" provider="pacemaker">
1f016a
+            <lrm_rsc_op id="rhel7-alt4_last_0" operation_key="rhel7-alt4_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.9" transition-key="8:5:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;8:5:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="1" rc-code="7" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt2"/>
1f016a
+          </lrm_resource>
1f016a
+          <lrm_resource id="fake" type="Dummy" class="ocf" provider="heartbeat">
1f016a
+            <lrm_rsc_op id="fake_last_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="8:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;8:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="19" rc-code="7" op-status="0" interval="0" last-run="1411504086" last-rc-change="1411504086" exec-time="34" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt2" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
1f016a
+          </lrm_resource>
1f016a
+        </lrm_resources>
1f016a
+      </lrm>
1f016a
+    </node_state>
1f016a
+    <node_state id="1" uname="rhel7-alt1" in_ccm="true" crmd="online" crm-debug-origin="post_cache_update" join="member" expected="member">
1f016a
+      <lrm id="1">
1f016a
+        <lrm_resources>
1f016a
+          <lrm_resource id="shooter" type="fence_xvm" class="stonith">
1f016a
+            <lrm_rsc_op id="shooter_last_0" operation_key="shooter_stop_0" operation="stop" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="11:23:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;11:23:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="19" rc-code="0" op-status="0" interval="0" last-run="1411504102" last-rc-change="1411504102" exec-time="1" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt1"/>
1f016a
+            <lrm_rsc_op id="shooter_monitor_60000" operation_key="shooter_monitor_60000" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="16:15:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;16:15:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="12" rc-code="0" op-status="0" interval="60000" last-rc-change="1411504079" exec-time="10" queue-time="0" op-digest="4811cef7f7f94e3a35a70be7916cb2fd" on_node="rhel7-alt1"/>
1f016a
+          </lrm_resource>
1f016a
+          <lrm_resource id="rhel7-alt4" type="remote" class="ocf" provider="pacemaker">
1f016a
+            <lrm_rsc_op id="rhel7-alt4_last_0" operation_key="rhel7-alt4_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="9:15:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;9:15:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="1" rc-code="7" op-status="0" interval="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt1"/>
1f016a
+          </lrm_resource>
1f016a
+          <lrm_resource id="fake" type="Dummy" class="ocf" provider="heartbeat">
1f016a
+            <lrm_rsc_op id="fake_last_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="8:18:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:7;8:18:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="17" rc-code="7" op-status="0" interval="0" last-run="1411504087" last-rc-change="1411504087" exec-time="29" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt1" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
1f016a
+          </lrm_resource>
1f016a
+        </lrm_resources>
1f016a
+      </lrm>
1f016a
+      <transient_attributes id="1">
1f016a
+        <instance_attributes id="status-1">
1f016a
+          <nvpair id="status-1-shutdown" name="shutdown" value="0"/>
1f016a
+          <nvpair id="status-1-probe_complete" name="probe_complete" value="true"/>
1f016a
+        </instance_attributes>
1f016a
+      </transient_attributes>
1f016a
+    </node_state>
1f016a
+    <node_state id="3" uname="rhel7-alt3" in_ccm="false" crmd="offline" crm-debug-origin="send_stonith_update" join="down" expected="down"/>
1f016a
+    <node_state id="rhel7-alt4" remote_node="true" uname="rhel7-alt4" crm-debug-origin="post_cache_update">
1f016a
+      <lrm id="rhel7-alt4">
1f016a
+        <lrm_resources>
1f016a
+          <lrm_resource id="fake" type="Dummy" class="ocf" provider="heartbeat">
1f016a
+            <lrm_rsc_op id="fake_last_failure_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="34" rc-code="0" op-status="0" interval="0" last-run="1411504087" last-rc-change="1411504087" exec-time="29" queue-time="1" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
1f016a
+            <lrm_rsc_op id="fake_last_0" operation_key="fake_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;12:21:7:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="34" rc-code="0" op-status="0" interval="0" last-run="1411504087" last-rc-change="1411504087" exec-time="29" queue-time="1" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" on_node="rhel7-alt3" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
1f016a
+            <lrm_rsc_op id="fake_monitor_10000" operation_key="fake_monitor_10000" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.9" transition-key="16:22:0:68028369-58a1-453d-bcdd-c1d1ade99959" transition-magic="0:0;16:22:0:68028369-58a1-453d-bcdd-c1d1ade99959" call-id="35" rc-code="0" op-status="0" interval="10000" last-rc-change="1411504087" exec-time="29" queue-time="0" op-digest="4811cef7f7f94e3a35a70be7916cb2fd" on_node="rhel7-alt3"/>
1f016a
+          </lrm_resource>
1f016a
+        </lrm_resources>
1f016a
+      </lrm>
1f016a
+      <transient_attributes id="rhel7-alt4">
1f016a
+        <instance_attributes id="status-rhel7-alt4">
1f016a
+          <nvpair id="status-rhel7-alt4-probe_complete" name="probe_complete" value="true"/>
1f016a
+          <nvpair id="status-rhel7-alt4-last-failure-fake" name="last-failure-fake" value="1411503989"/>
1f016a
+        </instance_attributes>
1f016a
+      </transient_attributes>
1f016a
+    </node_state>
1f016a
+  </status>
1f016a
+</cib>
1f016a
diff --git a/tools/crm_attribute.c b/tools/crm_attribute.c
1f016a
index 60d39b6..c37b096 100644
1f016a
--- a/tools/crm_attribute.c
1f016a
+++ b/tools/crm_attribute.c
1f016a
@@ -235,6 +235,7 @@ main(int argc, char **argv)
1f016a
         /* we're updating cluster options - dont populate dest_node */
1f016a
         type = XML_CIB_TAG_CRMCONFIG;
1f016a
 
1f016a
+    } else if (safe_str_eq(type, XML_CIB_TAG_CRMCONFIG)) {
1f016a
     } else if (safe_str_neq(type, XML_CIB_TAG_TICKETS)) {
1f016a
         if (dest_uname == NULL) {
1f016a
             dest_uname = get_node_name(0);
1f016a
diff --git a/tools/crm_mon.upstart.in b/tools/crm_mon.upstart.in
1f016a
new file mode 100644
1f016a
index 0000000..ef0fe7a
1f016a
--- /dev/null
1f016a
+++ b/tools/crm_mon.upstart.in
1f016a
@@ -0,0 +1,39 @@
1f016a
+# crm_mon - Daemon for pacemaker monitor
1f016a
+#
1f016a
+#
1f016a
+
1f016a
+kill timeout 3600
1f016a
+respawn
1f016a
+respawn limit 10 3600
1f016a
+
1f016a
+expect fork
1f016a
+
1f016a
+env prog=crm_mon
1f016a
+env rpm_sysconf=@sysconfdir@/sysconfig/crm_mon
1f016a
+env rpm_lockfile=@localstatedir@/lock/subsys/crm_mon
1f016a
+env deb_sysconf=@sysconfdir@/default/crm_mon
1f016a
+env deb_lockfile=@localstatedir@/lock/crm_mon
1f016a
+
1f016a
+
1f016a
+script
1f016a
+    [ -f "$rpm_sysconf" ] && . $rpm_sysconf
1f016a
+    [ -f "$deb_sysconf" ] && . $deb_sysconf
1f016a
+    exec $prog $OPTIONS
1f016a
+end script
1f016a
+
1f016a
+post-start script
1f016a
+    [ -f "$rpm_sysconf" ] && . $rpm_sysconf
1f016a
+    [ -f "$deb_sysconf" ] && . $deb_sysconf
1f016a
+    [ -z "$LOCK_FILE" -a -d @sysconfdir@/sysconfig ] && LOCK_FILE="$rpm_lockfile"
1f016a
+    [ -z "$LOCK_FILE" -a -d @sysconfdir@/default ] && LOCK_FILE="$deb_lockfile"
1f016a
+    touch $LOCK_FILE
1f016a
+end script
1f016a
+
1f016a
+post-stop script
1f016a
+    [ -f "$rpm_sysconf" ] && . $rpm_sysconf
1f016a
+    [ -f "$deb_sysconf" ] && . $deb_sysconf
1f016a
+    [ -z "$LOCK_FILE" -a -d @sysconfdir@/sysconfig ] && LOCK_FILE="$rpm_lockfile"
1f016a
+    [ -z "$LOCK_FILE" -a -d @sysconfdir@/default ] && LOCK_FILE="$deb_lockfile"
1f016a
+    rm -f $LOCK_FILE
1f016a
+end script
1f016a
+
1f016a
diff --git a/tools/crm_resource.c b/tools/crm_resource.c
1f016a
index 6537520..56583e0 100644
1f016a
--- a/tools/crm_resource.c
1f016a
+++ b/tools/crm_resource.c
1f016a
@@ -2214,11 +2214,15 @@ main(int argc, char **argv)
1f016a
         }
1f016a
 
1f016a
     } else if (rsc_cmd == 'C') {
1f016a
-#if 0
1f016a
+#if HAVE_ATOMIC_ATTRD
1f016a
         xmlNode *cmd = create_request(CRM_OP_REPROBE, NULL, host_uname,
1f016a
                                       CRM_SYSTEM_CRMD, crm_system_name, our_pid);
1f016a
 
1f016a
-        crm_debug("Re-checking the state of all resources on %s", host_uname);
1f016a
+        crm_debug("Re-checking the state of all resources on %s", host_uname?host_uname:"all nodes");
1f016a
+
1f016a
+        rc = attrd_update_delegate(
1f016a
+            NULL, 'u', host_uname, "fail-count-*", NULL, XML_CIB_TAG_STATUS, NULL, NULL, NULL, FALSE);
1f016a
+
1f016a
         if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) {
1f016a
             start_mainloop();
1f016a
         }