commit d9b95435059189843e1fb7b1f7530fc163fdfc13
Author: David Vossel <dvossel@redhat.com>
Date: Wed Sep 25 17:02:50 2013 -0400
properly set remote node attributes
diff --git a/crmd/lrm.c b/crmd/lrm.c
index 0254a9f..7157e24 100644
--- a/crmd/lrm.c
+++ b/crmd/lrm.c
@@ -367,7 +367,7 @@ lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
}
if (counter > 0) {
- do_crm_log(log_level, "%d pending LRM operations at %s%s", counter, when);
+ do_crm_log(log_level, "%d pending LRM operations at %s", counter, when);
if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) {
g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
diff --git a/crmd/membership.c b/crmd/membership.c
index 370d1a2..e2bcd45 100644
--- a/crmd/membership.c
+++ b/crmd/membership.c
@@ -260,6 +260,13 @@ populate_cib_nodes(enum node_update_flags flags, const char *source)
do_update_node_cib(node, flags, node_list, source);
}
+ if (crm_remote_peer_cache) {
+ g_hash_table_iter_init(&iter, crm_remote_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+ do_update_node_cib(node, flags, node_list, source);
+ }
+ }
+
fsa_cib_update(XML_CIB_TAG_STATUS, node_list, call_options, call_id, NULL);
fsa_register_cib_callback(call_id, FALSE, NULL, crmd_node_update_complete);
last_peer_update = call_id;
diff --git a/crmd/messages.c b/crmd/messages.c
index 9aa69cc..057383a 100644
--- a/crmd/messages.c
+++ b/crmd/messages.c
@@ -930,7 +930,7 @@ send_msg_via_ipc(xmlNode * msg, const char *sys)
crmd_proxy_send(sys, msg);
} else {
- crm_err("Unknown Sub-system (%s)... discarding message.", crm_str(sys));
+ crm_debug("Unknown Sub-system (%s)... discarding message.", crm_str(sys));
send_ok = FALSE;
}
diff --git a/crmd/pengine.c b/crmd/pengine.c
index 5546d7e..2f3eba8 100644
--- a/crmd/pengine.c
+++ b/crmd/pengine.c
@@ -271,6 +271,9 @@ do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void
CRM_LOG_ASSERT(output != NULL);
+ /* refresh our remote-node cache when the pengine is invoked */
+ crm_remote_peer_cache_refresh(output);
+
crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid);
crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum);
diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c
index d38d7f0..42ea043 100644
--- a/crmd/remote_lrmd_ra.c
+++ b/crmd/remote_lrmd_ra.c
@@ -333,7 +333,10 @@ remote_lrm_op_callback(lrmd_event_data_t * op)
} else {
/* make sure we have a clean status section to start with */
+ lrm_state_reset_tables(lrm_state);
remote_init_cib_status(lrm_state);
+ erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local);
+ erase_status_tag(lrm_state->node_name, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
cmd->rc = PCMK_EXECRA_OK;
cmd->op_status = PCMK_LRM_OP_DONE;
@@ -430,15 +433,6 @@ handle_remote_ra_exec(gpointer user_data)
g_list_free_1(first);
if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
- xmlNode *status = create_xml_node(NULL, XML_CIB_TAG_STATE);
-
- /* clear node status in cib */
- crm_xml_add(status, XML_ATTR_ID, lrm_state->node_name);
- lrm_state_reset_tables(lrm_state);
- fsa_cib_delete(XML_CIB_TAG_STATUS, status, cib_quorum_override, rc, NULL);
- crm_info("Forced a remote LRM refresh before connection start: call=%d", rc);
- crm_log_xml_trace(status, "CLEAR LRM");
- free_xml(status);
rc = handle_remote_ra_start(lrm_state, cmd, cmd->timeout);
if (rc == 0) {
diff --git a/crmd/te_utils.c b/crmd/te_utils.c
index 54fae04..239af63 100644
--- a/crmd/te_utils.c
+++ b/crmd/te_utils.c
@@ -390,16 +390,7 @@ abort_transition_graph(int abort_priority, enum transition_action abort_action,
if (safe_str_eq(XML_CIB_TAG_STATE, kind)
|| safe_str_eq(XML_CIB_TAG_NODE, kind)) {
- if (crm_is_true(crm_element_value(search, XML_NODE_IS_REMOTE))) {
- /* Remote node uname and uuids are the same.
- * We also don't want them to be present in the
- * peer cache, so we shouldn't look them up with
- * crm_peer_uname()
- */
- uname = ID(search);
- } else {
- uname = crm_peer_uname(ID(search));
- }
+ uname = crm_peer_uname(ID(search));
break;
}
search = search->parent;
diff --git a/include/crm/cluster.h b/include/crm/cluster.h
index 54b7f58..960c3d0 100644
--- a/include/crm/cluster.h
+++ b/include/crm/cluster.h
@@ -32,6 +32,7 @@
extern gboolean crm_have_quorum;
extern GHashTable *crm_peer_cache;
+extern GHashTable *crm_remote_peer_cache;
extern unsigned long long crm_peer_seq;
# ifndef CRM_SERVICE
@@ -55,12 +56,16 @@ enum crm_join_phase
};
/* *INDENT-ON* */
+enum crm_node_flags
+{
+ crm_remote_node = 0x0001,
+};
typedef struct crm_peer_node_s {
uint32_t id; /* Only used by corosync derivatives */
uint64_t born; /* Only used by heartbeat and the legacy plugin */
uint64_t last_seen;
- uint64_t flags; /* Unused, but might be a good place to specify 'remote' */
+ uint64_t flags; /* Specified by crm_node_flags enum */
int32_t votes; /* Only used by the legacy plugin */
uint32_t processes;
@@ -124,11 +129,25 @@ enum crm_ais_msg_types {
crm_msg_pe = 8,
crm_msg_stonith_ng = 9,
};
+
+/* used with crm_get_peer_full */
+enum crm_get_peer_flags {
+ CRM_GET_PEER_CLUSTER = 0x0001,
+ CRM_GET_PEER_REMOTE = 0x0002,
+};
/* *INDENT-ON* */
gboolean send_cluster_message(crm_node_t * node, enum crm_ais_msg_types service,
xmlNode * data, gboolean ordered);
+
+/* Initialize and refresh the remote peer cache from a cib config */
+void crm_remote_peer_cache_refresh(xmlNode *cib);
+
+/* allows filtering of remote and cluster nodes using crm_get_peer_flags */
+crm_node_t *crm_get_peer_full(unsigned int id, const char *uname, int flags);
+
+/* only searches cluster nodes */
crm_node_t *crm_get_peer(unsigned int id, const char *uname);
guint crm_active_peers(void);
diff --git a/lib/cib/cib_attrs.c b/lib/cib/cib_attrs.c
index 4af077c..d1e1b74 100644
--- a/lib/cib/cib_attrs.c
+++ b/lib/cib/cib_attrs.c
@@ -430,6 +430,8 @@ get_remote_node_uuid(cib_t * the_cib, const char *uname, char **uuid)
cib_sync_call | cib_scope_local | cib_xpath, NULL);
free(xpath_string);
free(xml_search);
+ xml_search = NULL;
+ xpath_string = NULL;
if (rc != pcmk_ok) {
len = strlen(REMOTE_NODE_XPATH2) + strlen(uname) + 1;
diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c
index 5820c8d..5b743f9 100644
--- a/lib/cluster/cluster.c
+++ b/lib/cluster/cluster.c
@@ -361,6 +361,11 @@ crm_peer_uname(const char *uuid)
CRM_CHECK(uuid != NULL, return NULL);
+ /* remote nodes have the same uname and uuid */
+ if (g_hash_table_lookup(crm_remote_peer_cache, uuid)) {
+ return uuid;
+ }
+
/* avoid blocking calls where possible */
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
index bc1684e..e3082b4 100644
--- a/lib/cluster/membership.c
+++ b/lib/cluster/membership.c
@@ -33,9 +33,56 @@
#include <crm/stonith-ng.h>
GHashTable *crm_peer_cache = NULL;
+GHashTable *crm_remote_peer_cache = NULL;
unsigned long long crm_peer_seq = 0;
gboolean crm_have_quorum = FALSE;
+static void
+remote_cache_refresh_helper(xmlNode *cib, const char *xpath, const char *field, int flags)
+{
+ const char *remote = NULL;
+ crm_node_t *node = NULL;
+ xmlXPathObjectPtr xpathObj = NULL;
+ int max = 0;
+ int lpc = 0;
+
+ xpathObj = xpath_search(cib, xpath);
+ max = numXpathResults(xpathObj);
+ for (lpc = 0; lpc < max; lpc++) {
+ xmlNode *xml = getXpathResult(xpathObj, lpc);
+
+ CRM_CHECK(xml != NULL, continue);
+
+ remote = crm_element_value(xml, field);
+ if (remote) {
+ crm_trace("added %s to remote cache", remote);
+ node = calloc(1, sizeof(crm_node_t));
+ node->flags = flags;
+ CRM_ASSERT(node);
+ node->uname = strdup(remote);
+ node->uuid = strdup(remote);
+ node->state = strdup(CRM_NODE_MEMBER);
+ g_hash_table_replace(crm_remote_peer_cache, node->uname, node);
+ }
+ }
+ freeXpathObject(xpathObj);
+}
+
+void crm_remote_peer_cache_refresh(xmlNode *cib)
+{
+ const char *xpath = NULL;
+
+ g_hash_table_remove_all(crm_remote_peer_cache);
+
+ /* remote nodes associated with a cluster resource */
+ xpath = "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR "[@name='remote-node']";
+ remote_cache_refresh_helper(cib, xpath, "value", crm_remote_node);
+
+ /* remote nodes seen in the status section */
+ xpath = "//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE "[@remote_node='true']";
+ remote_cache_refresh_helper(cib, xpath, "id", crm_remote_node);
+}
+
gboolean
crm_is_peer_active(const crm_node_t * node)
{
@@ -146,6 +193,10 @@ crm_peer_init(void)
if (crm_peer_cache == NULL) {
crm_peer_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_crm_node);
}
+
+ if (crm_remote_peer_cache == NULL) {
+ crm_remote_peer_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, destroy_crm_node);
+ }
}
void
@@ -156,6 +207,12 @@ crm_peer_destroy(void)
g_hash_table_destroy(crm_peer_cache);
crm_peer_cache = NULL;
}
+
+ if (crm_remote_peer_cache != NULL) {
+ crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
+ g_hash_table_destroy(crm_remote_peer_cache);
+ crm_remote_peer_cache = NULL;
+ }
}
void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL;
@@ -186,6 +243,25 @@ static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer use
return FALSE;
}
+crm_node_t *
+crm_get_peer_full(unsigned int id, const char *uname, int flags)
+{
+ crm_node_t *node = NULL;
+
+ CRM_ASSERT(id > 0 || uname != NULL);
+
+ crm_peer_init();
+
+ if (flags & CRM_GET_PEER_REMOTE) {
+ node = g_hash_table_lookup(crm_remote_peer_cache, uname);
+ }
+
+ if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
+ node = crm_get_peer(id, uname);
+ }
+ return node;
+}
+
/* coverity[-alloc] Memory is referenced in one or both hashtables */
crm_node_t *
crm_get_peer(unsigned int id, const char *uname)
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
index bdaf18c..e577293 100644
--- a/lib/lrmd/lrmd_client.c
+++ b/lib/lrmd/lrmd_client.c
@@ -89,6 +89,11 @@ typedef struct lrmd_private_s {
gnutls_psk_client_credentials_t psk_cred_c;
int sock;
+ /* since tls requires a round trip across the network for a
+ * request/reply, there are times where we just want to be able
+ * to send a request from the client and not wait around (or even care
+ * about) what the reply is. */
+ int expected_late_replies;
GList *pending_notify;
crm_trigger_t *process_notify;
#endif
@@ -241,9 +246,7 @@ lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg)
/* this is proxy business */
lrmd_internal_proxy_dispatch(lrmd, msg);
return 1;
- }
-
- if (!native->callback) {
+ } else if (!native->callback) {
/* no callback set */
crm_trace("notify event received but client has not set callback");
return 1;
@@ -371,7 +374,19 @@ lrmd_tls_dispatch(gpointer userdata)
xml = crm_remote_parse_buffer(native->remote);
}
while (xml) {
- lrmd_dispatch_internal(lrmd, xml);
+ const char *msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE);
+ if (safe_str_eq(msg_type, "notify")) {
+ lrmd_dispatch_internal(lrmd, xml);
+ } else if (safe_str_eq(msg_type, "reply")) {
+ if (native->expected_late_replies > 0) {
+ native->expected_late_replies--;
+ } else {
+ int reply_id = 0;
+ crm_element_value_int(xml, F_LRMD_CALLID, &reply_id);
+ /* if this happens, we want to know about it */
+ crm_err("Got outdated reply %d", reply_id);
+ }
+ }
free_xml(xml);
xml = crm_remote_parse_buffer(native->remote);
}
@@ -617,7 +632,11 @@ lrmd_tls_recv_reply(lrmd_t * lrmd, int total_timeout, int expected_reply_id, int
free_xml(xml);
xml = NULL;
} else if (reply_id != expected_reply_id) {
- crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id);
+ if (native->expected_late_replies > 0) {
+ native->expected_late_replies--;
+ } else {
+ crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id);
+ }
free_xml(xml);
xml = NULL;
}
@@ -724,6 +743,12 @@ lrmd_send_xml_no_reply(lrmd_t * lrmd, xmlNode * msg)
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
rc = lrmd_tls_send(lrmd, msg);
+ if (rc == pcmk_ok) {
+ /* we don't want to wait around for the reply, but
+ * since the request/reply protocol needs to behave the same
+ * as libqb, a reply will eventually come later anyway. */
+ native->expected_late_replies++;
+ }
break;
#endif
default:
diff --git a/lrmd/ipc_proxy.c b/lrmd/ipc_proxy.c
index bbf9b24..3a51a5b 100644
--- a/lrmd/ipc_proxy.c
+++ b/lrmd/ipc_proxy.c
@@ -313,6 +313,8 @@ ipc_proxy_remove_provider(crm_client_t *ipc_proxy)
GHashTableIter iter;
crm_client_t *ipc_client = NULL;
char *key = NULL;
+ GList *remove_these = NULL;
+ GListPtr gIter = NULL;
if (ipc_providers == NULL) {
return;
@@ -326,9 +328,19 @@ ipc_proxy_remove_provider(crm_client_t *ipc_proxy)
if (safe_str_eq(proxy_id, ipc_proxy->id)) {
crm_info("ipc proxy connection for client %s pid %d destroyed because cluster node disconnected.",
ipc_client->id, ipc_client->pid);
- qb_ipcs_disconnect(ipc_client->ipcs);
+ /* we can't remove during the iteration, so copy items
+ * to a list we can destroy later */
+ remove_these = g_list_append(remove_these, ipc_client);
}
}
+
+ for (gIter = remove_these; gIter != NULL; gIter = gIter->next) {
+ ipc_client = gIter->data;
+ qb_ipcs_disconnect(ipc_client->ipcs);
+ }
+
+ /* just frees the list, not the elements in the list */
+ g_list_free(remove_these);
}
void
diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c
index a4747cb..b5bbea0 100644
--- a/lrmd/lrmd.c
+++ b/lrmd/lrmd.c
@@ -1297,6 +1297,7 @@ process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request)
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_POKE, TRUE)) {
do_notify = 1;
+ do_reply = 1;
} else {
rc = -EOPNOTSUPP;
do_reply = 1;
diff --git a/pengine/allocate.c b/pengine/allocate.c
index cf8f4d4..bfa8e7b 100644
--- a/pengine/allocate.c
+++ b/pengine/allocate.c
@@ -1578,6 +1578,8 @@ apply_remote_node_ordering(pe_working_set_t *data_set)
container = remote_rsc->container;
if (safe_str_eq(action->task, "monitor") ||
safe_str_eq(action->task, "start") ||
+ safe_str_eq(action->task, "promote") ||
+ safe_str_eq(action->task, "demote") ||
safe_str_eq(action->task, CRM_OP_LRM_REFRESH) ||
safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT) ||
safe_str_eq(action->task, "delete")) {