Blob Blame History Raw
commit d9b95435059189843e1fb7b1f7530fc163fdfc13
Author: David Vossel <dvossel@redhat.com>
Date:   Wed Sep 25 17:02:50 2013 -0400

    properly set remote node attributes

diff --git a/crmd/lrm.c b/crmd/lrm.c
index 0254a9f..7157e24 100644
--- a/crmd/lrm.c
+++ b/crmd/lrm.c
@@ -367,7 +367,7 @@ lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
     }
 
     if (counter > 0) {
-        do_crm_log(log_level, "%d pending LRM operations at %s%s", counter, when);
+        do_crm_log(log_level, "%d pending LRM operations at %s", counter, when);
 
         if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) {
             g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
diff --git a/crmd/membership.c b/crmd/membership.c
index 370d1a2..e2bcd45 100644
--- a/crmd/membership.c
+++ b/crmd/membership.c
@@ -260,6 +260,13 @@ populate_cib_nodes(enum node_update_flags flags, const char *source)
             do_update_node_cib(node, flags, node_list, source);
         }
 
+        if (crm_remote_peer_cache) {
+            g_hash_table_iter_init(&iter, crm_remote_peer_cache);
+            while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+                do_update_node_cib(node, flags, node_list, source);
+            }
+        }
+
         fsa_cib_update(XML_CIB_TAG_STATUS, node_list, call_options, call_id, NULL);
         fsa_register_cib_callback(call_id, FALSE, NULL, crmd_node_update_complete);
         last_peer_update = call_id;
diff --git a/crmd/messages.c b/crmd/messages.c
index 9aa69cc..057383a 100644
--- a/crmd/messages.c
+++ b/crmd/messages.c
@@ -930,7 +930,7 @@ send_msg_via_ipc(xmlNode * msg, const char *sys)
         crmd_proxy_send(sys, msg);
 
     } else {
-        crm_err("Unknown Sub-system (%s)... discarding message.", crm_str(sys));
+        crm_debug("Unknown Sub-system (%s)... discarding message.", crm_str(sys));
         send_ok = FALSE;
     }
 
diff --git a/crmd/pengine.c b/crmd/pengine.c
index 5546d7e..2f3eba8 100644
--- a/crmd/pengine.c
+++ b/crmd/pengine.c
@@ -271,6 +271,9 @@ do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void
 
     CRM_LOG_ASSERT(output != NULL);
 
+    /* refresh our remote-node cache when the pengine is invoked */
+    crm_remote_peer_cache_refresh(output);
+
     crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid);
     crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum);
 
diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c
index d38d7f0..42ea043 100644
--- a/crmd/remote_lrmd_ra.c
+++ b/crmd/remote_lrmd_ra.c
@@ -333,7 +333,10 @@ remote_lrm_op_callback(lrmd_event_data_t * op)
 
         } else {
             /* make sure we have a clean status section to start with */
+            lrm_state_reset_tables(lrm_state);
             remote_init_cib_status(lrm_state);
+            erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local);
+            erase_status_tag(lrm_state->node_name, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
 
             cmd->rc = PCMK_EXECRA_OK;
             cmd->op_status = PCMK_LRM_OP_DONE;
@@ -430,15 +433,6 @@ handle_remote_ra_exec(gpointer user_data)
         g_list_free_1(first);
 
         if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
-            xmlNode *status = create_xml_node(NULL, XML_CIB_TAG_STATE);
-
-            /* clear node status in cib */
-            crm_xml_add(status, XML_ATTR_ID, lrm_state->node_name);
-            lrm_state_reset_tables(lrm_state);
-            fsa_cib_delete(XML_CIB_TAG_STATUS, status, cib_quorum_override, rc, NULL);
-            crm_info("Forced a remote LRM refresh before connection start: call=%d", rc);
-            crm_log_xml_trace(status, "CLEAR LRM");
-            free_xml(status);
 
             rc = handle_remote_ra_start(lrm_state, cmd, cmd->timeout);
             if (rc == 0) {
diff --git a/crmd/te_utils.c b/crmd/te_utils.c
index 54fae04..239af63 100644
--- a/crmd/te_utils.c
+++ b/crmd/te_utils.c
@@ -390,16 +390,7 @@ abort_transition_graph(int abort_priority, enum transition_action abort_action,
             if (safe_str_eq(XML_CIB_TAG_STATE, kind)
                || safe_str_eq(XML_CIB_TAG_NODE, kind)) {
 
-                if (crm_is_true(crm_element_value(search, XML_NODE_IS_REMOTE))) {
-                    /* Remote node uname and uuids are the same.
-                     * We also don't want them to be present in the
-                     * peer cache, so we shouldn't look them up with
-                     * crm_peer_uname()
-                     */
-                    uname = ID(search);
-                } else {
-                    uname = crm_peer_uname(ID(search));
-                }
+                uname = crm_peer_uname(ID(search));
                 break;
             }
             search = search->parent;
diff --git a/include/crm/cluster.h b/include/crm/cluster.h
index 54b7f58..960c3d0 100644
--- a/include/crm/cluster.h
+++ b/include/crm/cluster.h
@@ -32,6 +32,7 @@
 
 extern gboolean crm_have_quorum;
 extern GHashTable *crm_peer_cache;
+extern GHashTable *crm_remote_peer_cache;
 extern unsigned long long crm_peer_seq;
 
 #  ifndef CRM_SERVICE
@@ -55,12 +56,16 @@ enum crm_join_phase
 };
 
 /* *INDENT-ON* */
+enum crm_node_flags
+{
+    crm_remote_node     = 0x0001,
+};
 
 typedef struct crm_peer_node_s {
     uint32_t id;                /* Only used by corosync derivatives */
     uint64_t born;              /* Only used by heartbeat and the legacy plugin */
     uint64_t last_seen;
-    uint64_t flags;             /* Unused, but might be a good place to specify 'remote' */
+    uint64_t flags;             /* Specified by crm_node_flags enum */
 
     int32_t votes;              /* Only used by the legacy plugin */
     uint32_t processes;
@@ -124,11 +129,25 @@ enum crm_ais_msg_types {
     crm_msg_pe       = 8,
     crm_msg_stonith_ng = 9,
 };
+
+/* used with crm_get_peer_full */
+enum crm_get_peer_flags {
+    CRM_GET_PEER_CLUSTER   = 0x0001,
+    CRM_GET_PEER_REMOTE    = 0x0002,
+};
 /* *INDENT-ON* */
 
 gboolean send_cluster_message(crm_node_t * node, enum crm_ais_msg_types service,
                               xmlNode * data, gboolean ordered);
 
+
+/* Initialize and refresh the remote peer cache from a cib config */
+void crm_remote_peer_cache_refresh(xmlNode *cib);
+
+/* allows filtering of remote and cluster nodes using crm_get_peer_flags */
+crm_node_t *crm_get_peer_full(unsigned int id, const char *uname, int flags);
+
+/* only searches cluster nodes */
 crm_node_t *crm_get_peer(unsigned int id, const char *uname);
 
 guint crm_active_peers(void);
diff --git a/lib/cib/cib_attrs.c b/lib/cib/cib_attrs.c
index 4af077c..d1e1b74 100644
--- a/lib/cib/cib_attrs.c
+++ b/lib/cib/cib_attrs.c
@@ -430,6 +430,8 @@ get_remote_node_uuid(cib_t * the_cib, const char *uname, char **uuid)
                          cib_sync_call | cib_scope_local | cib_xpath, NULL);
     free(xpath_string);
     free(xml_search);
+    xml_search = NULL;
+    xpath_string = NULL;
 
     if (rc != pcmk_ok) {
         len = strlen(REMOTE_NODE_XPATH2) + strlen(uname) + 1;
diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c
index 5820c8d..5b743f9 100644
--- a/lib/cluster/cluster.c
+++ b/lib/cluster/cluster.c
@@ -361,6 +361,11 @@ crm_peer_uname(const char *uuid)
 
     CRM_CHECK(uuid != NULL, return NULL);
 
+    /* remote nodes have the same uname and uuid */
+    if (g_hash_table_lookup(crm_remote_peer_cache, uuid)) {
+        return uuid;
+    }
+
     /* avoid blocking calls where possible */
     g_hash_table_iter_init(&iter, crm_peer_cache);
     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
index bc1684e..e3082b4 100644
--- a/lib/cluster/membership.c
+++ b/lib/cluster/membership.c
@@ -33,9 +33,56 @@
 #include <crm/stonith-ng.h>
 
 GHashTable *crm_peer_cache = NULL;
+GHashTable *crm_remote_peer_cache = NULL;
 unsigned long long crm_peer_seq = 0;
 gboolean crm_have_quorum = FALSE;
 
+static void
+remote_cache_refresh_helper(xmlNode *cib, const char *xpath, const char *field, int flags)
+{
+    const char *remote = NULL;
+    crm_node_t *node = NULL;
+    xmlXPathObjectPtr xpathObj = NULL;
+    int max = 0;
+    int lpc = 0;
+
+    xpathObj = xpath_search(cib, xpath);
+    max = numXpathResults(xpathObj);
+    for (lpc = 0; lpc < max; lpc++) {
+        xmlNode *xml = getXpathResult(xpathObj, lpc);
+
+        CRM_CHECK(xml != NULL, continue);
+
+        remote = crm_element_value(xml, field);
+        if (remote) {
+            crm_trace("added %s to remote cache", remote);
+            node = calloc(1, sizeof(crm_node_t));
+            node->flags = flags;
+            CRM_ASSERT(node);
+            node->uname = strdup(remote);
+            node->uuid = strdup(remote);
+            node->state = strdup(CRM_NODE_MEMBER);
+            g_hash_table_replace(crm_remote_peer_cache, node->uname, node);
+        }
+    }
+    freeXpathObject(xpathObj);
+}
+
+void crm_remote_peer_cache_refresh(xmlNode *cib)
+{
+    const char *xpath = NULL;
+
+    g_hash_table_remove_all(crm_remote_peer_cache);
+
+    /* remote nodes associated with a cluster resource */
+    xpath = "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR "[@name='remote-node']";
+    remote_cache_refresh_helper(cib, xpath, "value", crm_remote_node);
+
+    /* remote nodes seen in the status section */
+    xpath = "//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE "[@remote_node='true']";
+    remote_cache_refresh_helper(cib, xpath, "id", crm_remote_node);
+}
+
 gboolean
 crm_is_peer_active(const crm_node_t * node)
 {
@@ -146,6 +193,10 @@ crm_peer_init(void)
     if (crm_peer_cache == NULL) {
         crm_peer_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_crm_node);
     }
+
+    if (crm_remote_peer_cache == NULL) {
+        crm_remote_peer_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, destroy_crm_node);
+    }
 }
 
 void
@@ -156,6 +207,12 @@ crm_peer_destroy(void)
         g_hash_table_destroy(crm_peer_cache);
         crm_peer_cache = NULL;
     }
+
+    if (crm_remote_peer_cache != NULL) {
+        crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
+        g_hash_table_destroy(crm_remote_peer_cache);
+        crm_remote_peer_cache = NULL;
+    }
 }
 
 void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL;
@@ -186,6 +243,25 @@ static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer use
     return FALSE;
 }
 
+crm_node_t *
+crm_get_peer_full(unsigned int id, const char *uname, int flags)
+{
+    crm_node_t *node = NULL;
+
+    CRM_ASSERT(id > 0 || uname != NULL);
+
+    crm_peer_init();
+
+    if (flags & CRM_GET_PEER_REMOTE) {
+        node = g_hash_table_lookup(crm_remote_peer_cache, uname);
+    }
+
+    if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
+        node = crm_get_peer(id, uname);
+    }
+    return node;
+}
+
 /* coverity[-alloc] Memory is referenced in one or both hashtables */
 crm_node_t *
 crm_get_peer(unsigned int id, const char *uname)
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
index bdaf18c..e577293 100644
--- a/lib/lrmd/lrmd_client.c
+++ b/lib/lrmd/lrmd_client.c
@@ -89,6 +89,11 @@ typedef struct lrmd_private_s {
     gnutls_psk_client_credentials_t psk_cred_c;
 
     int sock;
+    /* since tls requires a round trip across the network for a
+     * request/reply, there are times where we just want to be able
+     * to send a request from the client and not wait around (or even care
+     * about) what the reply is. */
+    int expected_late_replies;
     GList *pending_notify;
     crm_trigger_t *process_notify;
 #endif
@@ -241,9 +246,7 @@ lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg)
         /* this is proxy business */
         lrmd_internal_proxy_dispatch(lrmd, msg);
         return 1;
-    }
-
-    if (!native->callback) {
+    } else if (!native->callback) {
         /* no callback set */
         crm_trace("notify event received but client has not set callback");
         return 1;
@@ -371,7 +374,19 @@ lrmd_tls_dispatch(gpointer userdata)
         xml = crm_remote_parse_buffer(native->remote);
     }
     while (xml) {
-        lrmd_dispatch_internal(lrmd, xml);
+        const char *msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE);
+        if (safe_str_eq(msg_type, "notify")) {
+            lrmd_dispatch_internal(lrmd, xml);
+        } else if (safe_str_eq(msg_type, "reply")) {
+            if (native->expected_late_replies > 0) {
+                native->expected_late_replies--;
+            } else {
+                int reply_id = 0;
+                crm_element_value_int(xml, F_LRMD_CALLID, &reply_id);
+                /* if this happens, we want to know about it */
+                crm_err("Got outdated reply %d", reply_id);
+            }
+        }
         free_xml(xml);
         xml = crm_remote_parse_buffer(native->remote);
     }
@@ -617,7 +632,11 @@ lrmd_tls_recv_reply(lrmd_t * lrmd, int total_timeout, int expected_reply_id, int
             free_xml(xml);
             xml = NULL;
         } else if (reply_id != expected_reply_id) {
-            crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id);
+            if (native->expected_late_replies > 0) {
+                native->expected_late_replies--;
+            } else {
+                crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id);
+            }
             free_xml(xml);
             xml = NULL;
         }
@@ -724,6 +743,12 @@ lrmd_send_xml_no_reply(lrmd_t * lrmd, xmlNode * msg)
 #ifdef HAVE_GNUTLS_GNUTLS_H
         case CRM_CLIENT_TLS:
             rc = lrmd_tls_send(lrmd, msg);
+            if (rc == pcmk_ok) {
+                /* we don't want to wait around for the reply, but
+                 * since the request/reply protocol needs to behave the same
+                 * as libqb, a reply will eventually come later anyway. */
+                native->expected_late_replies++;
+            }
             break;
 #endif
         default:
diff --git a/lrmd/ipc_proxy.c b/lrmd/ipc_proxy.c
index bbf9b24..3a51a5b 100644
--- a/lrmd/ipc_proxy.c
+++ b/lrmd/ipc_proxy.c
@@ -313,6 +313,8 @@ ipc_proxy_remove_provider(crm_client_t *ipc_proxy)
     GHashTableIter iter;
     crm_client_t *ipc_client = NULL;
     char *key = NULL;
+    GList *remove_these = NULL;
+    GListPtr gIter = NULL;
 
     if (ipc_providers == NULL) {
         return;
@@ -326,9 +328,19 @@ ipc_proxy_remove_provider(crm_client_t *ipc_proxy)
         if (safe_str_eq(proxy_id, ipc_proxy->id)) {
             crm_info("ipc proxy connection for client %s pid %d destroyed because cluster node disconnected.",
                 ipc_client->id, ipc_client->pid);
-            qb_ipcs_disconnect(ipc_client->ipcs);
+            /* we can't remove during the iteration, so copy items
+             * to a list we can destroy later */
+            remove_these = g_list_append(remove_these, ipc_client);
         }
     }
+
+    for (gIter = remove_these; gIter != NULL; gIter = gIter->next) {
+        ipc_client = gIter->data;
+        qb_ipcs_disconnect(ipc_client->ipcs);
+    }
+
+    /* just frees the list, not the elements in the list */
+    g_list_free(remove_these);
 }
 
 void
diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c
index a4747cb..b5bbea0 100644
--- a/lrmd/lrmd.c
+++ b/lrmd/lrmd.c
@@ -1297,6 +1297,7 @@ process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request)
         do_reply = 1;
     } else if (crm_str_eq(op, LRMD_OP_POKE, TRUE)) {
         do_notify = 1;
+        do_reply = 1;
     } else {
         rc = -EOPNOTSUPP;
         do_reply = 1;
diff --git a/pengine/allocate.c b/pengine/allocate.c
index cf8f4d4..bfa8e7b 100644
--- a/pengine/allocate.c
+++ b/pengine/allocate.c
@@ -1578,6 +1578,8 @@ apply_remote_node_ordering(pe_working_set_t *data_set)
         container = remote_rsc->container;
         if (safe_str_eq(action->task, "monitor") ||
             safe_str_eq(action->task, "start") ||
+            safe_str_eq(action->task, "promote") ||
+            safe_str_eq(action->task, "demote") ||
             safe_str_eq(action->task, CRM_OP_LRM_REFRESH) ||
             safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT) ||
             safe_str_eq(action->task, "delete")) {