|
 |
1f016a |
diff --git a/GNUmakefile b/GNUmakefile
|
|
 |
1f016a |
index b17fb4c..f28dea8 100644
|
|
 |
1f016a |
--- a/GNUmakefile
|
|
 |
1f016a |
+++ b/GNUmakefile
|
|
 |
1f016a |
@@ -58,6 +58,8 @@ BUILD_COUNTER ?= build.counter
|
|
 |
1f016a |
LAST_COUNT = $(shell test ! -e $(BUILD_COUNTER) && echo 0; test -e $(BUILD_COUNTER) && cat $(BUILD_COUNTER))
|
|
 |
1f016a |
COUNT = $(shell expr 1 + $(LAST_COUNT))
|
|
 |
1f016a |
|
|
 |
1f016a |
+SPECVERSION ?= $(COUNT)
|
|
 |
1f016a |
+
|
|
 |
1f016a |
init:
|
|
 |
1f016a |
./autogen.sh
|
|
 |
1f016a |
|
|
 |
1f016a |
@@ -144,7 +146,7 @@ srpm-%: export $(PACKAGE)-%.spec
|
|
 |
1f016a |
if [ -e $(BUILD_COUNTER) ]; then \
|
|
 |
1f016a |
echo $(COUNT) > $(BUILD_COUNTER); \
|
|
 |
1f016a |
fi
|
|
 |
1f016a |
- sed -i 's/global\ specversion.*/global\ specversion\ $(COUNT)/' $(PACKAGE).spec
|
|
 |
1f016a |
+ sed -i 's/global\ specversion.*/global\ specversion\ $(SPECVERSION)/' $(PACKAGE).spec
|
|
 |
1f016a |
sed -i 's/global\ commit.*/global\ commit\ $(TAG)/' $(PACKAGE).spec
|
|
 |
1f016a |
case "$(WITH)" in \
|
|
 |
1f016a |
*pre_release*) \
|
|
 |
1f016a |
diff --git a/attrd/commands.c b/attrd/commands.c
|
|
 |
1f016a |
index c48ef1b..12771ee 100644
|
|
 |
1f016a |
--- a/attrd/commands.c
|
|
 |
1f016a |
+++ b/attrd/commands.c
|
|
 |
1f016a |
@@ -202,21 +202,27 @@ attrd_client_message(crm_client_t *client, xmlNode *xml)
|
|
 |
1f016a |
crm_debug("Setting %s to %s", regex, value);
|
|
 |
1f016a |
if (regcomp(r_patt, regex, REG_EXTENDED)) {
|
|
 |
1f016a |
crm_err("Bad regex '%s' for update", regex);
|
|
 |
1f016a |
- regfree(r_patt);
|
|
 |
1f016a |
- free(r_patt);
|
|
 |
1f016a |
- return;
|
|
 |
1f016a |
- }
|
|
 |
1f016a |
|
|
 |
1f016a |
- g_hash_table_iter_init(&aIter, attributes);
|
|
 |
1f016a |
- while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) {
|
|
 |
1f016a |
- int status = regexec(r_patt, attr, 0, NULL, 0);
|
|
 |
1f016a |
+ } else {
|
|
 |
1f016a |
|
|
 |
1f016a |
- if(status == 0) {
|
|
 |
1f016a |
- crm_trace("Matched %s with %s", attr, regex);
|
|
 |
1f016a |
- crm_xml_add(xml, F_ATTRD_ATTRIBUTE, attr);
|
|
 |
1f016a |
- send_attrd_message(NULL, xml);
|
|
 |
1f016a |
+ g_hash_table_iter_init(&aIter, attributes);
|
|
 |
1f016a |
+ while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) {
|
|
 |
1f016a |
+ int status = regexec(r_patt, attr, 0, NULL, 0);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ if(status == 0) {
|
|
 |
1f016a |
+ crm_trace("Matched %s with %s", attr, regex);
|
|
 |
1f016a |
+ crm_xml_add(xml, F_ATTRD_ATTRIBUTE, attr);
|
|
 |
1f016a |
+ send_attrd_message(NULL, xml);
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
}
|
|
 |
1f016a |
}
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ free(key);
|
|
 |
1f016a |
+ free(set);
|
|
 |
1f016a |
+ free(host);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ regfree(r_patt);
|
|
 |
1f016a |
+ free(r_patt);
|
|
 |
1f016a |
return;
|
|
 |
1f016a |
|
|
 |
1f016a |
} else if(host == NULL) {
|
|
 |
1f016a |
diff --git a/cib/callbacks.c b/cib/callbacks.c
|
|
 |
1f016a |
index 610aa0a..9fb2de9 100644
|
|
 |
1f016a |
--- a/cib/callbacks.c
|
|
 |
1f016a |
+++ b/cib/callbacks.c
|
|
 |
1f016a |
@@ -382,6 +382,9 @@ do_local_notify(xmlNode * notify_src, const char *client_id,
|
|
 |
1f016a |
/* send callback to originating child */
|
|
 |
1f016a |
crm_client_t *client_obj = NULL;
|
|
 |
1f016a |
int local_rc = pcmk_ok;
|
|
 |
1f016a |
+ int call_id = 0;
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ crm_element_value_int(notify_src, F_CIB_CALLID, &call_id);
|
|
 |
1f016a |
|
|
 |
1f016a |
if (client_id != NULL) {
|
|
 |
1f016a |
client_obj = crm_client_get_by_id(client_id);
|
|
 |
1f016a |
@@ -389,7 +392,7 @@ do_local_notify(xmlNode * notify_src, const char *client_id,
|
|
 |
1f016a |
|
|
 |
1f016a |
if (client_obj == NULL) {
|
|
 |
1f016a |
local_rc = -ECONNRESET;
|
|
 |
1f016a |
- crm_trace("No client to sent the response to. F_CIB_CLIENTID not set.");
|
|
 |
1f016a |
+ crm_trace("No client to sent response %d to, F_CIB_CLIENTID not set.", call_id);
|
|
 |
1f016a |
|
|
 |
1f016a |
} else {
|
|
 |
1f016a |
int rid = 0;
|
|
 |
1f016a |
@@ -405,13 +408,13 @@ do_local_notify(xmlNode * notify_src, const char *client_id,
|
|
 |
1f016a |
rid, client_obj->name,
|
|
 |
1f016a |
from_peer ? "(originator of delegated request)" : "");
|
|
 |
1f016a |
} else {
|
|
 |
1f016a |
- crm_trace("Sending response to %s %s",
|
|
 |
1f016a |
- client_obj->name, from_peer ? "(originator of delegated request)" : "");
|
|
 |
1f016a |
+ crm_trace("Sending response [call %d] to %s %s",
|
|
 |
1f016a |
+ call_id, client_obj->name, from_peer ? "(originator of delegated request)" : "");
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
} else {
|
|
 |
1f016a |
- crm_trace("Sending an event to %s %s",
|
|
 |
1f016a |
- client_obj->name, from_peer ? "(originator of delegated request)" : "");
|
|
 |
1f016a |
+ crm_trace("Sending event %d to %s %s",
|
|
 |
1f016a |
+ call_id, client_obj->name, from_peer ? "(originator of delegated request)" : "");
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
switch (client_obj->kind) {
|
|
 |
1f016a |
diff --git a/crmd/control.c b/crmd/control.c
|
|
 |
1f016a |
index 99ef659..8989859 100644
|
|
 |
1f016a |
--- a/crmd/control.c
|
|
 |
1f016a |
+++ b/crmd/control.c
|
|
 |
1f016a |
@@ -945,17 +945,23 @@ config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void
|
|
 |
1f016a |
throttle_load_target = strtof(value, NULL) / 100;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
+ value = getenv("SBD_WATCHDOG_TIMEOUT");
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ if(value == NULL) {
|
|
 |
1f016a |
+ value = crmd_pref(config_hash, "stonith-watchdog-timeout");
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+
|
|
 |
1f016a |
value = crmd_pref(config_hash, "stonith-watchdog-timeout");
|
|
 |
1f016a |
- if(crm_get_msec(value) > 0 && daemon_option("watchdog") == NULL) {
|
|
 |
1f016a |
+ if(crm_get_msec(value) > 0 && !daemon_option_enabled(crm_system_name, "watchdog")) {
|
|
 |
1f016a |
do_crm_log_always(LOG_EMERG, "Shutting down pacemaker, no watchdog device configured");
|
|
 |
1f016a |
crmd_exit(DAEMON_RESPAWN_STOP);
|
|
 |
1f016a |
|
|
 |
1f016a |
- } else if(crm_get_msec(value) <= 0 && daemon_option("watchdog")) {
|
|
 |
1f016a |
+ } else if(crm_get_msec(value) <= 0 && daemon_option_enabled(crm_system_name, "watchdog")) {
|
|
 |
1f016a |
crm_warn("Watchdog enabled but no stonith-watchdog-timeout configured");
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
value = crmd_pref(config_hash, "no-quorum-policy");
|
|
 |
1f016a |
- if (safe_str_eq(value, "suicide") && daemon_option("watchdog")) {
|
|
 |
1f016a |
+ if (safe_str_eq(value, "suicide") && daemon_option_enabled(crm_system_name, "watchdog")) {
|
|
 |
1f016a |
no_quorum_suicide_escalation = TRUE;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
diff --git a/crmd/election.c b/crmd/election.c
|
|
 |
1f016a |
index e0bd6c4..a4f2e99 100644
|
|
 |
1f016a |
--- a/crmd/election.c
|
|
 |
1f016a |
+++ b/crmd/election.c
|
|
 |
1f016a |
@@ -208,6 +208,9 @@ do_dc_takeover(long long action,
|
|
 |
1f016a |
fsa_register_cib_callback(rc, FALSE, NULL, feature_update_callback);
|
|
 |
1f016a |
|
|
 |
1f016a |
update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
|
|
 |
1f016a |
+ XML_ATTR_HAVE_WATCHDOG, daemon_option("watchdog"), FALSE, NULL, NULL);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
|
|
 |
1f016a |
"dc-version", VERSION "-" BUILD_VERSION, FALSE, NULL, NULL);
|
|
 |
1f016a |
|
|
 |
1f016a |
update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
|
|
 |
1f016a |
diff --git a/crmd/pengine.c b/crmd/pengine.c
|
|
 |
1f016a |
index ab426be..2f7513f 100644
|
|
 |
1f016a |
--- a/crmd/pengine.c
|
|
 |
1f016a |
+++ b/crmd/pengine.c
|
|
 |
1f016a |
@@ -237,6 +237,48 @@ do_pe_invoke(long long action,
|
|
 |
1f016a |
fsa_register_cib_callback(fsa_pe_query, FALSE, NULL, do_pe_invoke_callback);
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
+static void
|
|
 |
1f016a |
+force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value)
|
|
 |
1f016a |
+{
|
|
 |
1f016a |
+ int max = 0;
|
|
 |
1f016a |
+ int lpc = 0;
|
|
 |
1f016a |
+ int xpath_max = 1024;
|
|
 |
1f016a |
+ char *xpath_string = NULL;
|
|
 |
1f016a |
+ xmlXPathObjectPtr xpathObj = NULL;
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ xpath_string = calloc(1, xpath_max);
|
|
 |
1f016a |
+ lpc = snprintf(xpath_string, xpath_max, "%.128s//%s//nvpair[@name='%.128s']",
|
|
 |
1f016a |
+ get_object_path(XML_CIB_TAG_CRMCONFIG), XML_CIB_TAG_PROPSET, attr_name);
|
|
 |
1f016a |
+ CRM_LOG_ASSERT(lpc > 0);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ xpathObj = xpath_search(xml, xpath_string);
|
|
 |
1f016a |
+ max = numXpathResults(xpathObj);
|
|
 |
1f016a |
+ free(xpath_string);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ for (lpc = 0; lpc < max; lpc++) {
|
|
 |
1f016a |
+ xmlNode *match = getXpathResult(xpathObj, lpc);
|
|
 |
1f016a |
+ crm_trace("Forcing %s/%s = %s", ID(match), attr_name, attr_value);
|
|
 |
1f016a |
+ crm_xml_add(match, XML_NVPAIR_ATTR_VALUE, attr_value);
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ if(max == 0) {
|
|
 |
1f016a |
+ char *attr_id = crm_concat(CIB_OPTIONS_FIRST, attr_name, '-');
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ crm_trace("Creating %s/%s = %s", attr_id, attr_name, attr_value);
|
|
 |
1f016a |
+ xml = create_xml_node(xml, XML_CIB_TAG_CRMCONFIG);
|
|
 |
1f016a |
+ xml = create_xml_node(xml, XML_CIB_TAG_PROPSET);
|
|
 |
1f016a |
+ crm_xml_add(xml, XML_ATTR_ID, CIB_OPTIONS_FIRST);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ xml = create_xml_node(xml, XML_CIB_TAG_NVPAIR);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ crm_xml_add(xml, XML_ATTR_ID, attr_id);
|
|
 |
1f016a |
+ crm_xml_add(xml, XML_NVPAIR_ATTR_NAME, attr_name);
|
|
 |
1f016a |
+ crm_xml_add(xml, XML_NVPAIR_ATTR_VALUE, attr_value);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ free(attr_id);
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+}
|
|
 |
1f016a |
+
|
|
 |
1f016a |
void
|
|
 |
1f016a |
do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
|
|
 |
1f016a |
{
|
|
 |
1f016a |
@@ -279,6 +321,7 @@ do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void
|
|
 |
1f016a |
|
|
 |
1f016a |
crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid);
|
|
 |
1f016a |
crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum);
|
|
 |
1f016a |
+ force_local_option(output, XML_ATTR_HAVE_WATCHDOG, daemon_option("watchdog"));
|
|
 |
1f016a |
|
|
 |
1f016a |
if (ever_had_quorum && crm_have_quorum == FALSE) {
|
|
 |
1f016a |
crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1);
|
|
 |
1f016a |
diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c
|
|
 |
1f016a |
index f3dedeb..2f658ee 100644
|
|
 |
1f016a |
--- a/crmd/remote_lrmd_ra.c
|
|
 |
1f016a |
+++ b/crmd/remote_lrmd_ra.c
|
|
 |
1f016a |
@@ -140,8 +140,6 @@ recurring_helper(gpointer data)
|
|
 |
1f016a |
|
|
 |
1f016a |
ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
|
|
 |
1f016a |
|
|
 |
1f016a |
- cmd->call_id = generate_callid();
|
|
 |
1f016a |
-
|
|
 |
1f016a |
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
|
|
 |
1f016a |
mainloop_set_trigger(ra_data->work);
|
|
 |
1f016a |
}
|
|
 |
1f016a |
@@ -177,6 +175,24 @@ report_remote_ra_result(remote_ra_cmd_t * cmd)
|
|
 |
1f016a |
op.interval = cmd->interval;
|
|
 |
1f016a |
op.rc = cmd->rc;
|
|
 |
1f016a |
op.op_status = cmd->op_status;
|
|
 |
1f016a |
+ op.t_run = cmd->start_time;
|
|
 |
1f016a |
+ op.t_rcchange = cmd->start_time;
|
|
 |
1f016a |
+ if (cmd->reported_success && cmd->rc != PCMK_OCF_OK) {
|
|
 |
1f016a |
+ op.t_rcchange = time(NULL);
|
|
 |
1f016a |
+ /* This edge case will likely never ever occur, but if it does the
|
|
 |
1f016a |
+ * result is that a failure will not be processed correctly. This is only
|
|
 |
1f016a |
+ * remotely possible because we are able to detect a connection resource's tcp
|
|
 |
1f016a |
+ * connection has failed at any moment after start has completed. The actual
|
|
 |
1f016a |
+ * recurring operation is just a connectivity ping.
|
|
 |
1f016a |
+ *
|
|
 |
1f016a |
+ * basically, we are not guaranteed that the first successful monitor op and
|
|
 |
1f016a |
+ * a subsequent failed monitor op will not occur in the same timestamp. We have to
|
|
 |
1f016a |
+ * make it look like the operations occurred at separate times though. */
|
|
 |
1f016a |
+ if (op.t_rcchange == op.t_run) {
|
|
 |
1f016a |
+ op.t_rcchange++;
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+
|
|
 |
1f016a |
if (cmd->params) {
|
|
 |
1f016a |
lrmd_key_value_t *tmp;
|
|
 |
1f016a |
|
|
 |
1f016a |
diff --git a/crmd/te_utils.c b/crmd/te_utils.c
|
|
 |
1f016a |
index 66ed1da..69c22e3 100644
|
|
 |
1f016a |
--- a/crmd/te_utils.c
|
|
 |
1f016a |
+++ b/crmd/te_utils.c
|
|
 |
1f016a |
@@ -126,19 +126,19 @@ tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
|
|
 |
1f016a |
|
|
 |
1f016a |
if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) {
|
|
 |
1f016a |
crm_notice("%s was successfully unfenced by %s (at the request of %s)",
|
|
 |
1f016a |
- st_event->target, st_event->executioner, st_event->origin);
|
|
 |
1f016a |
+ st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin);
|
|
 |
1f016a |
/* TODO: Hook up st_event->device */
|
|
 |
1f016a |
return;
|
|
 |
1f016a |
|
|
 |
1f016a |
} else if (safe_str_eq("on", st_event->action)) {
|
|
 |
1f016a |
crm_err("Unfencing of %s by %s failed: %s (%d)",
|
|
 |
1f016a |
- st_event->target, st_event->executioner,
|
|
 |
1f016a |
+ st_event->target, st_event->executioner ? st_event->executioner : "<anyone>",
|
|
 |
1f016a |
pcmk_strerror(st_event->result), st_event->result);
|
|
 |
1f016a |
return;
|
|
 |
1f016a |
|
|
 |
1f016a |
} else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
|
|
 |
1f016a |
crm_crit("We were allegedly just fenced by %s for %s!",
|
|
 |
1f016a |
- st_event->executioner, st_event->origin); /* Dumps blackbox if enabled */
|
|
 |
1f016a |
+ st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* Dumps blackbox if enabled */
|
|
 |
1f016a |
|
|
 |
1f016a |
qb_log_fini(); /* Try to get the above log message to disk - somehow */
|
|
 |
1f016a |
|
|
 |
1f016a |
diff --git a/cts/CIB.py b/cts/CIB.py
|
|
 |
1f016a |
index 6ce38e9..d26efdb 100644
|
|
 |
1f016a |
--- a/cts/CIB.py
|
|
 |
1f016a |
+++ b/cts/CIB.py
|
|
 |
1f016a |
@@ -177,6 +177,8 @@ class CIB11(ConfigBase):
|
|
 |
1f016a |
for node in self.CM.Env["nodes"]:
|
|
 |
1f016a |
ftype = self.CM.Env.RandomGen.choice(["levels-and", "levels-or ", "broadcast "])
|
|
 |
1f016a |
self.CM.log(" - Using %s fencing for node: %s" % (ftype, node))
|
|
 |
1f016a |
+ # for baremetal remote node tests
|
|
 |
1f016a |
+ stt_nodes.append("remote_%s" % node)
|
|
 |
1f016a |
if ftype == "levels-and":
|
|
 |
1f016a |
stl.level(1, node, "FencingPass,Fencing")
|
|
 |
1f016a |
stt_nodes.append(node)
|
|
 |
1f016a |
diff --git a/cts/CTStests.py b/cts/CTStests.py
|
|
 |
1f016a |
index cd5b7ce..d2b7668 100644
|
|
 |
1f016a |
--- a/cts/CTStests.py
|
|
 |
1f016a |
+++ b/cts/CTStests.py
|
|
 |
1f016a |
@@ -453,8 +453,8 @@ class StonithdTest(CTSTest):
|
|
 |
1f016a |
is_dc = self.CM.is_node_dc(node)
|
|
 |
1f016a |
|
|
 |
1f016a |
watchpats = []
|
|
 |
1f016a |
- watchpats.append("Operation .* for host '%s' with device .* returned: 0" % node)
|
|
 |
1f016a |
- watchpats.append("tengine_stonith_notify:.*Peer %s was terminated .*: OK" % node)
|
|
 |
1f016a |
+ watchpats.append(self.templates["Pat:FenceOpOK"] % node)
|
|
 |
1f016a |
+ watchpats.append(self.templates["Pat:NodeFenced"] % node)
|
|
 |
1f016a |
|
|
 |
1f016a |
if self.Env["at-boot"] == 0:
|
|
 |
1f016a |
self.debug("Expecting %s to stay down" % node)
|
|
 |
1f016a |
@@ -2634,11 +2634,11 @@ AllTestClasses.append(RemoteLXC)
|
|
 |
1f016a |
|
|
 |
1f016a |
|
|
 |
1f016a |
###################################################################
|
|
 |
1f016a |
-class RemoteBaremetal(CTSTest):
|
|
 |
1f016a |
+class RemoteDriver(CTSTest):
|
|
 |
1f016a |
###################################################################
|
|
 |
1f016a |
def __init__(self, cm):
|
|
 |
1f016a |
CTSTest.__init__(self,cm)
|
|
 |
1f016a |
- self.name = "RemoteBaremetal"
|
|
 |
1f016a |
+ self.name = "RemoteDriver"
|
|
 |
1f016a |
self.is_docker_unsafe = 1
|
|
 |
1f016a |
self.start = StartTest(cm)
|
|
 |
1f016a |
self.startall = SimulStartLite(cm)
|
|
 |
1f016a |
@@ -2647,9 +2647,8 @@ class RemoteBaremetal(CTSTest):
|
|
 |
1f016a |
self.failed = 0
|
|
 |
1f016a |
self.fail_string = ""
|
|
 |
1f016a |
self.remote_node_added = 0
|
|
 |
1f016a |
- self.remote_node = "remote1"
|
|
 |
1f016a |
self.remote_rsc_added = 0
|
|
 |
1f016a |
- self.remote_rsc = "remote1-rsc"
|
|
 |
1f016a |
+ self.remote_rsc = "remote-rsc"
|
|
 |
1f016a |
self.cib_cmd = """cibadmin -C -o %s -X '%s' """
|
|
 |
1f016a |
|
|
 |
1f016a |
def del_rsc(self, node, rsc):
|
|
 |
1f016a |
@@ -2679,10 +2678,11 @@ class RemoteBaremetal(CTSTest):
|
|
 |
1f016a |
|
|
 |
1f016a |
def add_primitive_rsc(self, node):
|
|
 |
1f016a |
rsc_xml = """
|
|
 |
1f016a |
-<primitive class="ocf" id="%s" provider="pacemaker" type="Dummy">
|
|
 |
1f016a |
+<primitive class="ocf" id="%s" provider="heartbeat" type="Dummy">
|
|
 |
1f016a |
<operations>
|
|
 |
1f016a |
- <op id="remote1-rsc-monitor-interval-10s" interval="10s" name="monitor"/>
|
|
 |
1f016a |
+ <op id="remote-rsc-monitor-interval-10s" interval="10s" name="monitor"/>
|
|
 |
1f016a |
</operations>
|
|
 |
1f016a |
+ <meta_attributes id="remote-meta_attributes"/>
|
|
 |
1f016a |
</primitive>""" % (self.remote_rsc)
|
|
 |
1f016a |
self.add_rsc(node, rsc_xml)
|
|
 |
1f016a |
if self.failed == 0:
|
|
 |
1f016a |
@@ -2691,21 +2691,38 @@ class RemoteBaremetal(CTSTest):
|
|
 |
1f016a |
def add_connection_rsc(self, node):
|
|
 |
1f016a |
rsc_xml = """
|
|
 |
1f016a |
<primitive class="ocf" id="%s" provider="pacemaker" type="remote">
|
|
 |
1f016a |
- <instance_attributes id="remote1-instance_attributes"/>
|
|
 |
1f016a |
- <instance_attributes id="remote1-instance_attributes">
|
|
 |
1f016a |
- <nvpair id="remote1-instance_attributes-server" name="server" value="%s"/>
|
|
 |
1f016a |
+ <instance_attributes id="remote-instance_attributes"/>
|
|
 |
1f016a |
+ <instance_attributes id="remote-instance_attributes">
|
|
 |
1f016a |
+ <nvpair id="remote-instance_attributes-server" name="server" value="%s"/>
|
|
 |
1f016a |
</instance_attributes>
|
|
 |
1f016a |
<operations>
|
|
 |
1f016a |
- <op id="remote1-monitor-interval-60s" interval="60s" name="monitor"/>
|
|
 |
1f016a |
- <op id="remote1-name-start-interval-0-timeout-60" interval="0" name="start" timeout="60"/>
|
|
 |
1f016a |
+ <op id="remote-monitor-interval-60s" interval="60s" name="monitor"/>
|
|
 |
1f016a |
+ <op id="remote-name-start-interval-0-timeout-120" interval="0" name="start" timeout="120"/>
|
|
 |
1f016a |
</operations>
|
|
 |
1f016a |
- <meta_attributes id="remote1-meta_attributes"/>
|
|
 |
1f016a |
</primitive>""" % (self.remote_node, node)
|
|
 |
1f016a |
self.add_rsc(node, rsc_xml)
|
|
 |
1f016a |
if self.failed == 0:
|
|
 |
1f016a |
self.remote_node_added = 1
|
|
 |
1f016a |
|
|
 |
1f016a |
- def step1_start_metal(self, node):
|
|
 |
1f016a |
+ def stop_pcmk_remote(self, node):
|
|
 |
1f016a |
+ # disable pcmk remote
|
|
 |
1f016a |
+ for i in range(10):
|
|
 |
1f016a |
+ rc = self.rsh(node, "service pacemaker_remote stop")
|
|
 |
1f016a |
+ if rc != 0:
|
|
 |
1f016a |
+ time.sleep(6)
|
|
 |
1f016a |
+ else:
|
|
 |
1f016a |
+ break
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ def start_pcmk_remote(self, node):
|
|
 |
1f016a |
+ for i in range(10):
|
|
 |
1f016a |
+ rc = self.rsh(node, "service pacemaker_remote start")
|
|
 |
1f016a |
+ if rc != 0:
|
|
 |
1f016a |
+ time.sleep(6)
|
|
 |
1f016a |
+ else:
|
|
 |
1f016a |
+ self.pcmk_started = 1
|
|
 |
1f016a |
+ break
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ def start_metal(self, node):
|
|
 |
1f016a |
pcmk_started = 0
|
|
 |
1f016a |
|
|
 |
1f016a |
# make sure the resource doesn't already exist for some reason
|
|
 |
1f016a |
@@ -2717,13 +2734,7 @@ class RemoteBaremetal(CTSTest):
|
|
 |
1f016a |
self.fail_string = "Failed to shutdown cluster node %s" % (node)
|
|
 |
1f016a |
return
|
|
 |
1f016a |
|
|
 |
1f016a |
- for i in range(10):
|
|
 |
1f016a |
- rc = self.rsh(node, "service pacemaker_remote start")
|
|
 |
1f016a |
- if rc != 0:
|
|
 |
1f016a |
- time.sleep(6)
|
|
 |
1f016a |
- else:
|
|
 |
1f016a |
- self.pcmk_started = 1
|
|
 |
1f016a |
- break
|
|
 |
1f016a |
+ self.start_pcmk_remote(node)
|
|
 |
1f016a |
|
|
 |
1f016a |
if self.pcmk_started == 0:
|
|
 |
1f016a |
self.failed = 1
|
|
 |
1f016a |
@@ -2735,6 +2746,7 @@ class RemoteBaremetal(CTSTest):
|
|
 |
1f016a |
watch = self.create_watch(pats, 120)
|
|
 |
1f016a |
watch.setwatch()
|
|
 |
1f016a |
pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "start"))
|
|
 |
1f016a |
+ pats.append(self.templates["Pat:DC_IDLE"])
|
|
 |
1f016a |
|
|
 |
1f016a |
self.add_connection_rsc(node)
|
|
 |
1f016a |
|
|
 |
1f016a |
@@ -2745,7 +2757,112 @@ class RemoteBaremetal(CTSTest):
|
|
 |
1f016a |
self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
|
|
 |
1f016a |
self.failed = 1
|
|
 |
1f016a |
|
|
 |
1f016a |
- def step2_add_rsc(self, node):
|
|
 |
1f016a |
+ def migrate_connection(self, node):
|
|
 |
1f016a |
+ if self.failed == 1:
|
|
 |
1f016a |
+ return
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ pats = [ ]
|
|
 |
1f016a |
+ pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "migrate_to"))
|
|
 |
1f016a |
+ pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "migrate_from"))
|
|
 |
1f016a |
+ pats.append(self.templates["Pat:DC_IDLE"])
|
|
 |
1f016a |
+ watch = self.create_watch(pats, 120)
|
|
 |
1f016a |
+ watch.setwatch()
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ (rc, lines) = self.rsh(node, "crm_resource -M -r %s" % (self.remote_node), None)
|
|
 |
1f016a |
+ if rc != 0:
|
|
 |
1f016a |
+ self.fail_string = "failed to move remote node connection resource"
|
|
 |
1f016a |
+ self.logger.log(self.fail_string)
|
|
 |
1f016a |
+ self.failed = 1
|
|
 |
1f016a |
+ return
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ self.set_timer("remoteMetalMigrate")
|
|
 |
1f016a |
+ watch.lookforall()
|
|
 |
1f016a |
+ self.log_timer("remoteMetalMigrate")
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ if watch.unmatched:
|
|
 |
1f016a |
+ self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
|
|
 |
1f016a |
+ self.logger.log(self.fail_string)
|
|
 |
1f016a |
+ self.failed = 1
|
|
 |
1f016a |
+ return
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ def fail_rsc(self, node):
|
|
 |
1f016a |
+ if self.failed == 1:
|
|
 |
1f016a |
+ return
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ watchpats = [ ]
|
|
 |
1f016a |
+ watchpats.append(self.templates["Pat:RscRemoteOpOK"] % (self.remote_rsc, "stop", self.remote_node))
|
|
 |
1f016a |
+ watchpats.append(self.templates["Pat:RscRemoteOpOK"] % (self.remote_rsc, "start", self.remote_node))
|
|
 |
1f016a |
+ watchpats.append(self.templates["Pat:DC_IDLE"])
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ watch = self.create_watch(watchpats, 120)
|
|
 |
1f016a |
+ watch.setwatch()
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ self.debug("causing dummy rsc to fail.")
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ rc = self.rsh(node, "rm -f /var/run/resource-agents/Dummy*")
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ self.set_timer("remoteRscFail")
|
|
 |
1f016a |
+ watch.lookforall()
|
|
 |
1f016a |
+ self.log_timer("remoteRscFail")
|
|
 |
1f016a |
+ if watch.unmatched:
|
|
 |
1f016a |
+ self.fail_string = "Unmatched patterns during rsc fail: %s" % (repr(watch.unmatched))
|
|
 |
1f016a |
+ self.logger.log(self.fail_string)
|
|
 |
1f016a |
+ self.failed = 1
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ def fail_connection(self, node):
|
|
 |
1f016a |
+ if self.failed == 1:
|
|
 |
1f016a |
+ return
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ watchpats = [ ]
|
|
 |
1f016a |
+ watchpats.append(self.templates["Pat:FenceOpOK"] % self.remote_node)
|
|
 |
1f016a |
+ watchpats.append(self.templates["Pat:NodeFenced"] % self.remote_node)
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ watch = self.create_watch(watchpats, 120)
|
|
 |
1f016a |
+ watch.setwatch()
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ # force stop the pcmk remote daemon. this will result in fencing
|
|
 |
1f016a |
+ self.debug("Force stopped active remote node")
|
|
 |
1f016a |
+ self.stop_pcmk_remote(node)
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ self.debug("Waiting for remote node to be fenced.")
|
|
 |
1f016a |
+ self.set_timer("remoteMetalFence")
|
|
 |
1f016a |
+ watch.lookforall()
|
|
 |
1f016a |
+ self.log_timer("remoteMetalFence")
|
|
 |
1f016a |
+ if watch.unmatched:
|
|
 |
1f016a |
+ self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
|
|
 |
1f016a |
+ self.logger.log(self.fail_string)
|
|
 |
1f016a |
+ self.failed = 1
|
|
 |
1f016a |
+ return
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ self.debug("Waiting for the remote node to come back up")
|
|
 |
1f016a |
+ self.CM.ns.WaitForNodeToComeUp(node, 120);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ pats = [ ]
|
|
 |
1f016a |
+ watch = self.create_watch(pats, 120)
|
|
 |
1f016a |
+ watch.setwatch()
|
|
 |
1f016a |
+ pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "start"))
|
|
 |
1f016a |
+ if self.remote_rsc_added == 1:
|
|
 |
1f016a |
+ pats.append(self.templates["Pat:RscOpOK"] % (self.remote_rsc, "monitor"))
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ # start the remote node again watch it integrate back into cluster.
|
|
 |
1f016a |
+ self.start_pcmk_remote(node)
|
|
 |
1f016a |
+ if self.pcmk_started == 0:
|
|
 |
1f016a |
+ self.failed = 1
|
|
 |
1f016a |
+ self.fail_string = "Failed to start pacemaker_remote on node %s" % (node)
|
|
 |
1f016a |
+ self.logger.log(self.fail_string)
|
|
 |
1f016a |
+ return
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ self.debug("Waiting for remote node to rejoin cluster after being fenced.")
|
|
 |
1f016a |
+ self.set_timer("remoteMetalRestart")
|
|
 |
1f016a |
+ watch.lookforall()
|
|
 |
1f016a |
+ self.log_timer("remoteMetalRestart")
|
|
 |
1f016a |
+ if watch.unmatched:
|
|
 |
1f016a |
+ self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
|
|
 |
1f016a |
+ self.failed = 1
|
|
 |
1f016a |
+ self.logger.log(self.fail_string)
|
|
 |
1f016a |
+ return
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ def add_dummy_rsc(self, node):
|
|
 |
1f016a |
if self.failed == 1:
|
|
 |
1f016a |
return
|
|
 |
1f016a |
|
|
 |
1f016a |
@@ -2753,33 +2870,19 @@ class RemoteBaremetal(CTSTest):
|
|
 |
1f016a |
pats = [ ]
|
|
 |
1f016a |
watch = self.create_watch(pats, 120)
|
|
 |
1f016a |
watch.setwatch()
|
|
 |
1f016a |
- pats.append("process_lrm_event:.*Operation %s_start_0.*node=%s, .*confirmed.*true" % (self.remote_rsc, self.remote_node))
|
|
 |
1f016a |
+ pats.append(self.templates["Pat:RscRemoteOpOK"] % (self.remote_rsc, "start", self.remote_node))
|
|
 |
1f016a |
+ pats.append(self.templates["Pat:DC_IDLE"])
|
|
 |
1f016a |
|
|
 |
1f016a |
# Add a resource that must live on remote-node
|
|
 |
1f016a |
self.add_primitive_rsc(node)
|
|
 |
1f016a |
- # this crm_resource command actually occurs on the remote node
|
|
 |
1f016a |
- # which verifies that the ipc proxy works
|
|
 |
1f016a |
- time.sleep(1)
|
|
 |
1f016a |
|
|
 |
1f016a |
- (rc, lines) = self.rsh(node, "crm_resource -W -r remote1-rsc --quiet", None)
|
|
 |
1f016a |
+ # force that rsc to prefer the remote node.
|
|
 |
1f016a |
+ (rc, line) = self.CM.rsh(node, "crm_resource -M -r %s -N %s -f" % (self.remote_rsc, self.remote_node), None)
|
|
 |
1f016a |
if rc != 0:
|
|
 |
1f016a |
- self.fail_string = "Failed to get location of resource remote1-rsc"
|
|
 |
1f016a |
+ self.fail_string = "Failed to place remote resource on remote node."
|
|
 |
1f016a |
self.failed = 1
|
|
 |
1f016a |
return
|
|
 |
1f016a |
|
|
 |
1f016a |
- find = 0
|
|
 |
1f016a |
- for line in lines:
|
|
 |
1f016a |
- if self.remote_node in line.split():
|
|
 |
1f016a |
- find = 1
|
|
 |
1f016a |
- break
|
|
 |
1f016a |
-
|
|
 |
1f016a |
- if find == 0:
|
|
 |
1f016a |
- rc = self.rsh(node, "crm_resource -M -r remote1-rsc -N %s" % (self.remote_node))
|
|
 |
1f016a |
- if rc != 0:
|
|
 |
1f016a |
- self.fail_string = "Failed to place primitive on remote-node"
|
|
 |
1f016a |
- self.failed = 1
|
|
 |
1f016a |
- return
|
|
 |
1f016a |
-
|
|
 |
1f016a |
self.set_timer("remoteMetalRsc")
|
|
 |
1f016a |
watch.lookforall()
|
|
 |
1f016a |
self.log_timer("remoteMetalRsc")
|
|
 |
1f016a |
@@ -2787,7 +2890,7 @@ class RemoteBaremetal(CTSTest):
|
|
 |
1f016a |
self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
|
|
 |
1f016a |
self.failed = 1
|
|
 |
1f016a |
|
|
 |
1f016a |
- def step3_test_attributes(self, node):
|
|
 |
1f016a |
+ def test_attributes(self, node):
|
|
 |
1f016a |
if self.failed == 1:
|
|
 |
1f016a |
return
|
|
 |
1f016a |
|
|
 |
1f016a |
@@ -2827,9 +2930,10 @@ class RemoteBaremetal(CTSTest):
|
|
 |
1f016a |
|
|
 |
1f016a |
self.set_timer("remoteMetalCleanup")
|
|
 |
1f016a |
if self.remote_rsc_added == 1:
|
|
 |
1f016a |
- self.rsh(node, "crm_resource -U -r remote1-rsc -N %s" % (self.remote_node))
|
|
 |
1f016a |
+ self.rsh(node, "crm_resource -U -r %s -N %s" % (self.remote_rsc, self.remote_node))
|
|
 |
1f016a |
self.del_rsc(node, self.remote_rsc)
|
|
 |
1f016a |
if self.remote_node_added == 1:
|
|
 |
1f016a |
+ self.rsh(node, "crm_resource -U -r %s" % (self.remote_node))
|
|
 |
1f016a |
self.del_rsc(node, self.remote_node)
|
|
 |
1f016a |
watch.lookforall()
|
|
 |
1f016a |
self.log_timer("remoteMetalCleanup")
|
|
 |
1f016a |
@@ -2838,15 +2942,11 @@ class RemoteBaremetal(CTSTest):
|
|
 |
1f016a |
self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
|
|
 |
1f016a |
self.failed = 1
|
|
 |
1f016a |
|
|
 |
1f016a |
- # disable pcmk remote
|
|
 |
1f016a |
- for i in range(10):
|
|
 |
1f016a |
- rc = self.rsh(node, "service pacemaker_remote stop")
|
|
 |
1f016a |
- if rc != 0:
|
|
 |
1f016a |
- time.sleep(6)
|
|
 |
1f016a |
- else:
|
|
 |
1f016a |
- break
|
|
 |
1f016a |
+ self.stop_pcmk_remote(node)
|
|
 |
1f016a |
|
|
 |
1f016a |
- def setup_env(self):
|
|
 |
1f016a |
+ def setup_env(self, node):
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ self.remote_node = "remote_%s" % (node)
|
|
 |
1f016a |
sync_key = 0
|
|
 |
1f016a |
|
|
 |
1f016a |
# we are assuming if all nodes have a key, that it is
|
|
 |
1f016a |
@@ -2887,10 +2987,10 @@ class RemoteBaremetal(CTSTest):
|
|
 |
1f016a |
if not ret:
|
|
 |
1f016a |
return self.failure("Setup failed, start all nodes failed.")
|
|
 |
1f016a |
|
|
 |
1f016a |
- self.setup_env()
|
|
 |
1f016a |
- self.step1_start_metal(node)
|
|
 |
1f016a |
- self.step2_add_rsc(node)
|
|
 |
1f016a |
- self.step3_test_attributes(node)
|
|
 |
1f016a |
+ self.setup_env(node)
|
|
 |
1f016a |
+ self.start_metal(node)
|
|
 |
1f016a |
+ self.add_dummy_rsc(node)
|
|
 |
1f016a |
+ self.test_attributes(node)
|
|
 |
1f016a |
self.cleanup_metal(node)
|
|
 |
1f016a |
|
|
 |
1f016a |
self.debug("Waiting for the cluster to recover")
|
|
 |
1f016a |
@@ -2902,11 +3002,201 @@ class RemoteBaremetal(CTSTest):
|
|
 |
1f016a |
|
|
 |
1f016a |
def errorstoignore(self):
|
|
 |
1f016a |
'''Return list of errors which should be ignored'''
|
|
 |
1f016a |
- return [ """is running on remote1 which isn't allowed""",
|
|
 |
1f016a |
+ return [ """is running on remote.*which isn't allowed""",
|
|
 |
1f016a |
"""Connection terminated""",
|
|
 |
1f016a |
"""Failed to send remote""",
|
|
 |
1f016a |
]
|
|
 |
1f016a |
|
|
 |
1f016a |
-AllTestClasses.append(RemoteBaremetal)
|
|
 |
1f016a |
+# Remote driver is called by other tests.
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+###################################################################
|
|
 |
1f016a |
+class RemoteBasic(CTSTest):
|
|
 |
1f016a |
+###################################################################
|
|
 |
1f016a |
+ def __init__(self, cm):
|
|
 |
1f016a |
+ CTSTest.__init__(self,cm)
|
|
 |
1f016a |
+ self.name = "RemoteBasic"
|
|
 |
1f016a |
+ self.start = StartTest(cm)
|
|
 |
1f016a |
+ self.startall = SimulStartLite(cm)
|
|
 |
1f016a |
+ self.driver = RemoteDriver(cm)
|
|
 |
1f016a |
+ self.is_docker_unsafe = 1
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ def __call__(self, node):
|
|
 |
1f016a |
+ '''Perform the 'RemoteBaremetal' test. '''
|
|
 |
1f016a |
+ self.incr("calls")
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ ret = self.startall(None)
|
|
 |
1f016a |
+ if not ret:
|
|
 |
1f016a |
+ return self.failure("Setup failed, start all nodes failed.")
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ self.driver.setup_env(node)
|
|
 |
1f016a |
+ self.driver.start_metal(node)
|
|
 |
1f016a |
+ self.driver.add_dummy_rsc(node)
|
|
 |
1f016a |
+ self.driver.test_attributes(node)
|
|
 |
1f016a |
+ self.driver.cleanup_metal(node)
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ self.debug("Waiting for the cluster to recover")
|
|
 |
1f016a |
+ self.CM.cluster_stable()
|
|
 |
1f016a |
+ if self.driver.failed == 1:
|
|
 |
1f016a |
+ return self.failure(self.driver.fail_string)
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ return self.success()
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ def is_applicable(self):
|
|
 |
1f016a |
+ return self.driver.is_applicable()
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ def errorstoignore(self):
|
|
 |
1f016a |
+ return self.driver.errorstoignore()
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+AllTestClasses.append(RemoteBasic)
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+###################################################################
|
|
 |
1f016a |
+class RemoteStonithd(CTSTest):
|
|
 |
1f016a |
+###################################################################
|
|
 |
1f016a |
+ def __init__(self, cm):
|
|
 |
1f016a |
+ CTSTest.__init__(self,cm)
|
|
 |
1f016a |
+ self.name = "RemoteStonithd"
|
|
 |
1f016a |
+ self.start = StartTest(cm)
|
|
 |
1f016a |
+ self.startall = SimulStartLite(cm)
|
|
 |
1f016a |
+ self.driver = RemoteDriver(cm)
|
|
 |
1f016a |
+ self.is_docker_unsafe = 1
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ def __call__(self, node):
|
|
 |
1f016a |
+ '''Perform the 'RemoteStonithd' test. '''
|
|
 |
1f016a |
+ self.incr("calls")
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ ret = self.startall(None)
|
|
 |
1f016a |
+ if not ret:
|
|
 |
1f016a |
+ return self.failure("Setup failed, start all nodes failed.")
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ self.driver.setup_env(node)
|
|
 |
1f016a |
+ self.driver.start_metal(node)
|
|
 |
1f016a |
+ self.driver.add_dummy_rsc(node)
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ self.driver.fail_connection(node)
|
|
 |
1f016a |
+ self.driver.cleanup_metal(node)
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ self.debug("Waiting for the cluster to recover")
|
|
 |
1f016a |
+ self.CM.cluster_stable()
|
|
 |
1f016a |
+ if self.driver.failed == 1:
|
|
 |
1f016a |
+ return self.failure(self.driver.fail_string)
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ return self.success()
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ def is_applicable(self):
|
|
 |
1f016a |
+ if not self.driver.is_applicable():
|
|
 |
1f016a |
+ return False
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ if self.Env.has_key("DoFencing"):
|
|
 |
1f016a |
+ return self.Env["DoFencing"]
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ return True
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ def errorstoignore(self):
|
|
 |
1f016a |
+ ignore_pats = [
|
|
 |
1f016a |
+ """Unexpected disconnect on remote-node""",
|
|
 |
1f016a |
+ """error: process_lrm_event: Operation remote_.*_monitor""",
|
|
 |
1f016a |
+ """LogActions: Recover remote_""",
|
|
 |
1f016a |
+ """Calculated Transition .* /var/lib/pacemaker/pengine/pe-error""",
|
|
 |
1f016a |
+ """error: native_create_actions: Resource .*ocf::.* is active on 2 nodes attempting recovery""",
|
|
 |
1f016a |
+ ]
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ ignore_pats.extend(self.driver.errorstoignore())
|
|
 |
1f016a |
+ return ignore_pats
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+AllTestClasses.append(RemoteStonithd)
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+###################################################################
|
|
 |
1f016a |
+class RemoteMigrate(CTSTest):
|
|
 |
1f016a |
+###################################################################
|
|
 |
1f016a |
+ def __init__(self, cm):
|
|
 |
1f016a |
+ CTSTest.__init__(self,cm)
|
|
 |
1f016a |
+ self.name = "RemoteMigrate"
|
|
 |
1f016a |
+ self.start = StartTest(cm)
|
|
 |
1f016a |
+ self.startall = SimulStartLite(cm)
|
|
 |
1f016a |
+ self.driver = RemoteDriver(cm)
|
|
 |
1f016a |
+ self.is_docker_unsafe = 1
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ def __call__(self, node):
|
|
 |
1f016a |
+ '''Perform the 'RemoteMigrate' test. '''
|
|
 |
1f016a |
+ self.incr("calls")
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ ret = self.startall(None)
|
|
 |
1f016a |
+ if not ret:
|
|
 |
1f016a |
+ return self.failure("Setup failed, start all nodes failed.")
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ self.driver.setup_env(node)
|
|
 |
1f016a |
+ self.driver.start_metal(node)
|
|
 |
1f016a |
+ self.driver.add_dummy_rsc(node)
|
|
 |
1f016a |
+ self.driver.migrate_connection(node)
|
|
 |
1f016a |
+ self.driver.cleanup_metal(node)
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ self.debug("Waiting for the cluster to recover")
|
|
 |
1f016a |
+ self.CM.cluster_stable()
|
|
 |
1f016a |
+ if self.driver.failed == 1:
|
|
 |
1f016a |
+ return self.failure(self.driver.fail_string)
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ return self.success()
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ def is_applicable(self):
|
|
 |
1f016a |
+ return self.driver.is_applicable()
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ def errorstoignore(self):
|
|
 |
1f016a |
+ return self.driver.errorstoignore()
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+AllTestClasses.append(RemoteMigrate)
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+###################################################################
|
|
 |
1f016a |
+class RemoteRscFailure(CTSTest):
|
|
 |
1f016a |
+###################################################################
|
|
 |
1f016a |
+ def __init__(self, cm):
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ # fail a rsc on a remote node, verify recovery.
|
|
 |
1f016a |
+ CTSTest.__init__(self,cm)
|
|
 |
1f016a |
+ self.name = "RemoteRscFailure"
|
|
 |
1f016a |
+ self.start = StartTest(cm)
|
|
 |
1f016a |
+ self.startall = SimulStartLite(cm)
|
|
 |
1f016a |
+ self.driver = RemoteDriver(cm)
|
|
 |
1f016a |
+ self.is_docker_unsafe = 1
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ def __call__(self, node):
|
|
 |
1f016a |
+ '''Perform the 'RemoteRscFailure' test. '''
|
|
 |
1f016a |
+ self.incr("calls")
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ ret = self.startall(None)
|
|
 |
1f016a |
+ if not ret:
|
|
 |
1f016a |
+ return self.failure("Setup failed, start all nodes failed.")
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ self.driver.setup_env(node)
|
|
 |
1f016a |
+ self.driver.start_metal(node)
|
|
 |
1f016a |
+ self.driver.add_dummy_rsc(node)
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ # This is an important step. We are migrating the connection
|
|
 |
1f016a |
+ # before failing the resource. This verifies that the migration
|
|
 |
1f016a |
+ # has properly maintained control over the remote-node.
|
|
 |
1f016a |
+ self.driver.migrate_connection(node)
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ self.driver.fail_rsc(node)
|
|
 |
1f016a |
+ self.driver.cleanup_metal(node)
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ self.debug("Waiting for the cluster to recover")
|
|
 |
1f016a |
+ self.CM.cluster_stable()
|
|
 |
1f016a |
+ if self.driver.failed == 1:
|
|
 |
1f016a |
+ return self.failure(self.driver.fail_string)
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ return self.success()
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ def is_applicable(self):
|
|
 |
1f016a |
+ return self.driver.is_applicable()
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ def errorstoignore(self):
|
|
 |
1f016a |
+ ignore_pats = [
|
|
 |
1f016a |
+ """LogActions: Recover remote-rsc""",
|
|
 |
1f016a |
+ ]
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ ignore_pats.extend(self.driver.errorstoignore())
|
|
 |
1f016a |
+ return ignore_pats
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+AllTestClasses.append(RemoteRscFailure)
|
|
 |
1f016a |
|
|
 |
1f016a |
# vim:ts=4:sw=4:et:
|
|
 |
1f016a |
diff --git a/cts/patterns.py b/cts/patterns.py
|
|
 |
1f016a |
index 8d34e1c..e734f40 100644
|
|
 |
1f016a |
--- a/cts/patterns.py
|
|
 |
1f016a |
+++ b/cts/patterns.py
|
|
 |
1f016a |
@@ -56,6 +56,9 @@ class BasePatterns:
|
|
 |
1f016a |
"Pat:Fencing_ok" : "stonith.*remote_op_done:.*Operation .* of %s by .*: OK",
|
|
 |
1f016a |
|
|
 |
1f016a |
"Pat:RscOpOK" : "process_lrm_event:.*Operation %s_%s.*ok.*confirmed",
|
|
 |
1f016a |
+ "Pat:RscRemoteOpOK" : "process_lrm_event:.*Operation %s_%s.*ok.*node=%s, .*confirmed.*true",
|
|
 |
1f016a |
+ "Pat:NodeFenced" : "tengine_stonith_notify:.*Peer %s was terminated .*: OK",
|
|
 |
1f016a |
+ "Pat:FenceOpOK" : "Operation .* for host '%s' with device .* returned: 0",
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
def get_component(self, key):
|
|
 |
1f016a |
diff --git a/cts/watcher.py b/cts/watcher.py
|
|
 |
1f016a |
index 5e6ee43..41e4100 100644
|
|
 |
1f016a |
--- a/cts/watcher.py
|
|
 |
1f016a |
+++ b/cts/watcher.py
|
|
 |
1f016a |
@@ -31,7 +31,8 @@ from cts.remote import *
|
|
 |
1f016a |
from cts.logging import *
|
|
 |
1f016a |
|
|
 |
1f016a |
has_log_watcher = {}
|
|
 |
1f016a |
-log_watcher_bin = CTSvars.CRM_DAEMON_DIR + "/cts_log_watcher.py"
|
|
 |
1f016a |
+log_watcher_file = "cts_log_watcher.py"
|
|
 |
1f016a |
+log_watcher_bin = CTSvars.CRM_DAEMON_DIR + "/" + log_watcher_file
|
|
 |
1f016a |
log_watcher = """
|
|
 |
1f016a |
import sys, os, fcntl
|
|
 |
1f016a |
|
|
 |
1f016a |
@@ -164,14 +165,16 @@ class FileObj(SearchObj):
|
|
 |
1f016a |
global log_watcher
|
|
 |
1f016a |
global log_watcher_bin
|
|
 |
1f016a |
|
|
 |
1f016a |
- self.debug("Installing %s on %s" % (log_watcher_bin, host))
|
|
 |
1f016a |
+ self.debug("Installing %s on %s" % (log_watcher_file, host))
|
|
 |
1f016a |
|
|
 |
1f016a |
- os.system("cat << END >> %s\n%s\nEND" %(log_watcher_bin, log_watcher))
|
|
 |
1f016a |
- os.system("chmod 755 %s" %(log_watcher_bin))
|
|
 |
1f016a |
+ os.system("cat << END >> %s\n%s\nEND" %(log_watcher_file, log_watcher))
|
|
 |
1f016a |
+ os.system("chmod 755 %s" %(log_watcher_file))
|
|
 |
1f016a |
|
|
 |
1f016a |
- self.rsh.cp(log_watcher_bin, "root@%s:%s" % (host, log_watcher_bin))
|
|
 |
1f016a |
+ self.rsh.cp(log_watcher_file, "root@%s:%s" % (host, log_watcher_bin))
|
|
 |
1f016a |
has_log_watcher[host] = 1
|
|
 |
1f016a |
|
|
 |
1f016a |
+ os.system("rm -f %s" %(log_watcher_file))
|
|
 |
1f016a |
+
|
|
 |
1f016a |
self.harvest()
|
|
 |
1f016a |
|
|
 |
1f016a |
def async_complete(self, pid, returncode, outLines, errLines):
|
|
 |
1f016a |
diff --git a/doc/Makefile.am b/doc/Makefile.am
|
|
 |
1f016a |
index 8798365..9194f1d 100644
|
|
 |
1f016a |
--- a/doc/Makefile.am
|
|
 |
1f016a |
+++ b/doc/Makefile.am
|
|
 |
1f016a |
@@ -29,6 +29,7 @@ publican_docs =
|
|
 |
1f016a |
generated_docs =
|
|
 |
1f016a |
generated_mans =
|
|
 |
1f016a |
|
|
 |
1f016a |
+ASCIIDOC_CLI_TYPE := pcs
|
|
 |
1f016a |
DOCBOOK_FORMATS := html-desktop
|
|
 |
1f016a |
DOCBOOK_LANGS := en-US
|
|
 |
1f016a |
DOTs = $(wildcard */en-US/images/*.dot)
|
|
 |
1f016a |
@@ -154,17 +155,15 @@ pdf:
|
|
 |
1f016a |
make DOCBOOK_FORMATS="pdf" ASCIIDOC_CLI_TYPE=$(ASCIIDOC_CLI_TYPE) all-local
|
|
 |
1f016a |
|
|
 |
1f016a |
# Make sure www-(pcs|crmsh) happen in serial
|
|
 |
1f016a |
-www:
|
|
 |
1f016a |
- make www-pcs
|
|
 |
1f016a |
- make www-crmsh
|
|
 |
1f016a |
- make $(generated_docs) $(ascii)
|
|
 |
1f016a |
+www: clean-local $(generated_docs) $(ascii)
|
|
 |
1f016a |
+ make www-cli
|
|
 |
1f016a |
rsync -rtz --progress $(generated_docs) $(ascii) $(asciiman) root@www.clusterlabs.org:/var/www/html/doc/
|
|
 |
1f016a |
|
|
 |
1f016a |
www-crmsh:
|
|
 |
1f016a |
make ASCIIDOC_CLI_TYPE=crmsh clean-local www-cli
|
|
 |
1f016a |
|
|
 |
1f016a |
www-pcs:
|
|
 |
1f016a |
- make ASCIIDOC_CLI_TYPE=pcs clean-local www-cli
|
|
 |
1f016a |
+ make ASCIIDOC_CLI_TYPE=pcs www-cli
|
|
 |
1f016a |
|
|
 |
1f016a |
www-cli:
|
|
 |
1f016a |
for book in $(docbook); do \
|
|
 |
1f016a |
diff --git a/doc/Pacemaker_Explained/en-US/Ch-Options.txt b/doc/Pacemaker_Explained/en-US/Ch-Options.txt
|
|
 |
1f016a |
index cf1478f..3a6ee0b 100644
|
|
 |
1f016a |
--- a/doc/Pacemaker_Explained/en-US/Ch-Options.txt
|
|
 |
1f016a |
+++ b/doc/Pacemaker_Explained/en-US/Ch-Options.txt
|
|
 |
1f016a |
@@ -89,22 +89,22 @@ cluster cannot start resources or fence other nodes. See
|
|
 |
1f016a |
+no-quorum-policy+ below.
|
|
 |
1f016a |
|
|
 |
1f016a |
| dc-version |
|
|
 |
1f016a |
-indexterm:[dc-version,Cluster Peroperty]
|
|
 |
1f016a |
-indexterm:[Cluster,Peroperty,dc-version]
|
|
 |
1f016a |
+indexterm:[dc-version,Cluster Property]
|
|
 |
1f016a |
+indexterm:[Cluster,Property,dc-version]
|
|
 |
1f016a |
Version of Pacemaker on the cluster's DC.
|
|
 |
1f016a |
|
|
 |
1f016a |
Often includes the hash which identifies the exact Git changeset it
|
|
 |
1f016a |
was built from. Used for diagnostic purposes.
|
|
 |
1f016a |
|
|
 |
1f016a |
| cluster-infrastructure |
|
|
 |
1f016a |
-indexterm:[cluster-infrastructure,Cluster Peroperty]
|
|
 |
1f016a |
-indexterm:[Cluster,Peroperty,cluster-infrastructure]
|
|
 |
1f016a |
+indexterm:[cluster-infrastructure,Cluster Property]
|
|
 |
1f016a |
+indexterm:[Cluster,Property,cluster-infrastructure]
|
|
 |
1f016a |
The messaging stack on which Pacemaker is currently running.
|
|
 |
1f016a |
Used for informational and diagnostic purposes.
|
|
 |
1f016a |
|
|
 |
1f016a |
| expected-quorum-votes |
|
|
 |
1f016a |
-indexterm:[expected-quorum-votes,Cluster Peroperty]
|
|
 |
1f016a |
-indexterm:[Cluster,Peroperty,expected-quorum-votes]
|
|
 |
1f016a |
+indexterm:[expected-quorum-votes,Cluster Property]
|
|
 |
1f016a |
+indexterm:[Cluster,Property,expected-quorum-votes]
|
|
 |
1f016a |
The number of nodes expected to be in the cluster
|
|
 |
1f016a |
|
|
 |
1f016a |
Used to calculate quorum in Corosync 1.x (not CMAN) based clusters.
|
|
 |
1f016a |
diff --git a/extra/resources/ping b/extra/resources/ping
|
|
 |
1f016a |
index b9a69b8..e7b9973 100755
|
|
 |
1f016a |
--- a/extra/resources/ping
|
|
 |
1f016a |
+++ b/extra/resources/ping
|
|
 |
1f016a |
@@ -77,7 +77,7 @@ The name of the attributes to set. This is the name to be used in the constrain
|
|
 |
1f016a |
The number by which to multiply the number of connected ping nodes by
|
|
 |
1f016a |
</longdesc>
|
|
 |
1f016a |
<shortdesc lang="en">Value multiplier</shortdesc>
|
|
 |
1f016a |
-<content type="integer" default=""/>
|
|
 |
1f016a |
+<content type="integer" default="1"/>
|
|
 |
1f016a |
</parameter>
|
|
 |
1f016a |
|
|
 |
1f016a |
<parameter name="host_list" unique="0" required="1">
|
|
 |
1f016a |
@@ -93,7 +93,7 @@ The list of ping nodes to count.
|
|
 |
1f016a |
Number of ping attempts, per host, before declaring it dead
|
|
 |
1f016a |
</longdesc>
|
|
 |
1f016a |
<shortdesc lang="en">no. of ping attempts</shortdesc>
|
|
 |
1f016a |
-<content type="integer" default="2"/>
|
|
 |
1f016a |
+<content type="integer" default="3"/>
|
|
 |
1f016a |
</parameter>
|
|
 |
1f016a |
|
|
 |
1f016a |
<parameter name="timeout" unique="0">
|
|
 |
1f016a |
@@ -121,6 +121,15 @@ Default never fails.
|
|
 |
1f016a |
<content type="integer" default=""/>
|
|
 |
1f016a |
</parameter>
|
|
 |
1f016a |
|
|
 |
1f016a |
+<parameter name="use_fping" unique="0">
|
|
 |
1f016a |
+<longdesc lang="en">
|
|
 |
1f016a |
+Use fping rather than ping, if found. If set to 0, fping
|
|
 |
1f016a |
+will not be used even if present.
|
|
 |
1f016a |
+</longdesc>
|
|
 |
1f016a |
+<shortdesc lang="en">Use fping if available</shortdesc>
|
|
 |
1f016a |
+<content type="boolean" default="1"/>
|
|
 |
1f016a |
+</parameter>
|
|
 |
1f016a |
+
|
|
 |
1f016a |
<parameter name="debug" unique="0">
|
|
 |
1f016a |
<longdesc lang="en">
|
|
 |
1f016a |
Enables to use default attrd_updater verbose logging on every call.
|
|
 |
1f016a |
@@ -154,7 +163,7 @@ ping_conditional_log() {
|
|
 |
1f016a |
|
|
 |
1f016a |
ping_usage() {
|
|
 |
1f016a |
cat <
|
|
 |
1f016a |
-usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data}
|
|
 |
1f016a |
+usage: $0 {start|stop|monitor|validate-all|meta-data}
|
|
 |
1f016a |
|
|
 |
1f016a |
Expects to have a fully populated OCF RA-compliant environment set.
|
|
 |
1f016a |
END
|
|
 |
1f016a |
@@ -225,7 +234,7 @@ fping_check() {
|
|
 |
1f016a |
n=$OCF_RESKEY_attempts
|
|
 |
1f016a |
timeout=`expr $OCF_RESKEY_timeout \* 1000 / $OCF_RESKEY_attempts`
|
|
 |
1f016a |
|
|
 |
1f016a |
- cmd="fping -r $OCF_RESKEY_attempts -t $timeout -B 1.0 $OCF_RESKEY_host_list"
|
|
 |
1f016a |
+ cmd="fping -r $OCF_RESKEY_attempts -t $timeout -B 1.0 $OCF_RESKEY_options $OCF_RESKEY_host_list"
|
|
 |
1f016a |
$cmd>$f_out 2>$f_err; rc=$?
|
|
 |
1f016a |
active=`grep alive $f_out|wc -l`
|
|
 |
1f016a |
|
|
 |
1f016a |
@@ -274,7 +283,7 @@ ping_check() {
|
|
 |
1f016a |
|
|
 |
1f016a |
ping_update() {
|
|
 |
1f016a |
|
|
 |
1f016a |
- if have_binary fping; then
|
|
 |
1f016a |
+ if ocf_is_true "$OCF_RESKEY_use_fping" && have_binary fping; then
|
|
 |
1f016a |
fping_check
|
|
 |
1f016a |
active=$?
|
|
 |
1f016a |
else
|
|
 |
1f016a |
@@ -306,6 +315,7 @@ ping_update() {
|
|
 |
1f016a |
: ${OCF_RESKEY_multiplier:="1"}
|
|
 |
1f016a |
: ${OCF_RESKEY_debug:="false"}
|
|
 |
1f016a |
: ${OCF_RESKEY_failure_score:="0"}
|
|
 |
1f016a |
+: ${OCF_RESKEY_use_fping:="1"}
|
|
 |
1f016a |
|
|
 |
1f016a |
: ${OCF_RESKEY_CRM_meta_timeout:="20000"}
|
|
 |
1f016a |
: ${OCF_RESKEY_CRM_meta_globally_unique:="true"}
|
|
 |
1f016a |
diff --git a/extra/resources/remote b/extra/resources/remote
|
|
 |
1f016a |
index 9f141a2..d79c4c3 100644
|
|
 |
1f016a |
--- a/extra/resources/remote
|
|
 |
1f016a |
+++ b/extra/resources/remote
|
|
 |
1f016a |
@@ -58,12 +58,12 @@ meta_data() {
|
|
 |
1f016a |
tcp port to connect to.
|
|
 |
1f016a |
</longdesc>
|
|
 |
1f016a |
<shortdesc lang="en">tcp port</shortdesc>
|
|
 |
1f016a |
- <content type="string" default="1984"/>
|
|
 |
1f016a |
+ <content type="string" default="3121"/>
|
|
 |
1f016a |
</parameter>
|
|
 |
1f016a |
</parameters>
|
|
 |
1f016a |
<actions>
|
|
 |
1f016a |
- <action name="start" timeout="40" />
|
|
 |
1f016a |
- <action name="stop" timeout="40" />
|
|
 |
1f016a |
+ <action name="start" timeout="60" />
|
|
 |
1f016a |
+ <action name="stop" timeout="60" />
|
|
 |
1f016a |
<action name="monitor" timeout="30" />
|
|
 |
1f016a |
<action name="migrate_to" timeout="60" />
|
|
 |
1f016a |
<action name="migrate_from" timeout="60" />
|
|
 |
1f016a |
diff --git a/fencing/main.c b/fencing/main.c
|
|
 |
1f016a |
index b03659e..fe6560d 100644
|
|
 |
1f016a |
--- a/fencing/main.c
|
|
 |
1f016a |
+++ b/fencing/main.c
|
|
 |
1f016a |
@@ -1002,15 +1002,22 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
|
|
 |
1f016a |
stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE);
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
- if(daemon_option("watchdog")) {
|
|
 |
1f016a |
- stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", local_cib, LOG_TRACE);
|
|
 |
1f016a |
- }
|
|
 |
1f016a |
+ if(daemon_option_enabled(crm_system_name, "watchdog")) {
|
|
 |
1f016a |
+ const char *value = getenv("SBD_WATCHDOG_TIMEOUT");
|
|
 |
1f016a |
|
|
 |
1f016a |
- if (stonith_watchdog_xml) {
|
|
 |
1f016a |
- const char *value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE);
|
|
 |
1f016a |
- stonith_watchdog_timeout_ms = crm_get_msec(value);
|
|
 |
1f016a |
- } else {
|
|
 |
1f016a |
- stonith_watchdog_timeout_ms = 0;
|
|
 |
1f016a |
+ if(value == NULL) {
|
|
 |
1f016a |
+ stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", local_cib, LOG_TRACE);
|
|
 |
1f016a |
+ if (stonith_watchdog_xml) {
|
|
 |
1f016a |
+ value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE);
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ if(value) {
|
|
 |
1f016a |
+ stonith_watchdog_timeout_ms = crm_get_msec(value);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ } else {
|
|
 |
1f016a |
+ stonith_watchdog_timeout_ms = 0;
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) {
|
|
 |
1f016a |
@@ -1354,11 +1361,12 @@ main(int argc, char **argv)
|
|
 |
1f016a |
|
|
 |
1f016a |
topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry);
|
|
 |
1f016a |
|
|
 |
1f016a |
- if(daemon_option("watchdog")) {
|
|
 |
1f016a |
+ if(daemon_option_enabled(crm_system_name, "watchdog")) {
|
|
 |
1f016a |
xmlNode *xml;
|
|
 |
1f016a |
stonith_key_value_t *params = NULL;
|
|
 |
1f016a |
|
|
 |
1f016a |
- stonith_key_value_add(params, STONITH_ATTR_HOSTLIST, stonith_our_uname);
|
|
 |
1f016a |
+ params = stonith_key_value_add(params, STONITH_ATTR_HOSTLIST, stonith_our_uname);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
xml = create_device_registration_xml("watchdog", "internal", STONITH_WATCHDOG_AGENT, params, NULL);
|
|
 |
1f016a |
stonith_device_register(xml, NULL, FALSE);
|
|
 |
1f016a |
|
|
 |
1f016a |
diff --git a/fencing/remote.c b/fencing/remote.c
|
|
 |
1f016a |
index 3f4f5ca..63c0274 100644
|
|
 |
1f016a |
--- a/fencing/remote.c
|
|
 |
1f016a |
+++ b/fencing/remote.c
|
|
 |
1f016a |
@@ -277,11 +277,9 @@ remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup)
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
if (!op->delegate && data) {
|
|
 |
1f016a |
- xmlNode *ndata = get_xpath_object("//@" F_STONITH_DELEGATE, data, LOG_WARNING);
|
|
 |
1f016a |
+ xmlNode *ndata = get_xpath_object("//@" F_STONITH_DELEGATE, data, LOG_TRACE);
|
|
 |
1f016a |
if(ndata) {
|
|
 |
1f016a |
op->delegate = crm_element_value_copy(ndata, F_STONITH_DELEGATE);
|
|
 |
1f016a |
- } else {
|
|
 |
1f016a |
- op->delegate = crm_element_value_copy(data, F_ORIG);
|
|
 |
1f016a |
}
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
@@ -1055,9 +1053,10 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
|
|
 |
1f016a |
|
|
 |
1f016a |
} else {
|
|
 |
1f016a |
timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(peer, op->base_timeout);
|
|
 |
1f016a |
- crm_info("Requesting that %s perform op %s %s for %s (%ds)",
|
|
 |
1f016a |
- peer->host, op->action, op->target, op->client_name, timeout_one);
|
|
 |
1f016a |
+ crm_info("Requesting that %s perform op %s %s for %s (%ds, %ds)",
|
|
 |
1f016a |
+ peer->host, op->action, op->target, op->client_name, timeout_one, stonith_watchdog_timeout_ms);
|
|
 |
1f016a |
crm_xml_add(remote_op, F_STONITH_MODE, "smart");
|
|
 |
1f016a |
+
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
op->state = st_exec;
|
|
 |
1f016a |
@@ -1065,7 +1064,17 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
|
|
 |
1f016a |
g_source_remove(op->op_timer_one);
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
- if(device && stonith_watchdog_timeout_ms && safe_str_eq(device, "watchdog")) {
|
|
 |
1f016a |
+ if(stonith_watchdog_timeout_ms > 0 && device && safe_str_eq(device, "watchdog")) {
|
|
 |
1f016a |
+ crm_notice("Waiting %ds for %s to self-terminate for %s.%.8s (%p)",
|
|
 |
1f016a |
+ stonith_watchdog_timeout_ms/1000, op->target, op->client_name, op->id, device);
|
|
 |
1f016a |
+ op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ /* TODO: We should probably look into peer->device_list to verify watchdog is going to be in use */
|
|
 |
1f016a |
+ } else if(stonith_watchdog_timeout_ms > 0
|
|
 |
1f016a |
+ && safe_str_eq(peer->host, op->target)
|
|
 |
1f016a |
+ && safe_str_neq(op->action, "on")) {
|
|
 |
1f016a |
+ crm_notice("Waiting %ds for %s to self-terminate for %s.%.8s (%p)",
|
|
 |
1f016a |
+ stonith_watchdog_timeout_ms/1000, op->target, op->client_name, op->id, device);
|
|
 |
1f016a |
op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
|
|
 |
1f016a |
|
|
 |
1f016a |
} else {
|
|
 |
1f016a |
@@ -1094,6 +1103,14 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
|
|
 |
1f016a |
/* if the operation never left the query state,
|
|
 |
1f016a |
* but we have all the expected replies, then no devices
|
|
 |
1f016a |
* are available to execute the fencing operation. */
|
|
 |
1f016a |
+ if(stonith_watchdog_timeout_ms && (device == NULL || safe_str_eq(device, "watchdog"))) {
|
|
 |
1f016a |
+ crm_notice("Waiting %ds for %s to self-terminate for %s.%.8s (%p)",
|
|
 |
1f016a |
+ stonith_watchdog_timeout_ms/1000, op->target, op->client_name, op->id, device);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
|
|
 |
1f016a |
+ return;
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+
|
|
 |
1f016a |
if (op->state == st_query) {
|
|
 |
1f016a |
crm_info("None of the %d peers have devices capable of terminating %s for %s (%d)",
|
|
 |
1f016a |
op->replies, op->target, op->client_name, op->state);
|
|
 |
1f016a |
diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h
|
|
 |
1f016a |
index 1ff425c..d3c2643 100644
|
|
 |
1f016a |
--- a/include/crm/msg_xml.h
|
|
 |
1f016a |
+++ b/include/crm/msg_xml.h
|
|
 |
1f016a |
@@ -86,6 +86,7 @@
|
|
 |
1f016a |
|
|
 |
1f016a |
# define XML_ATTR_QUORUM_PANIC "no-quorum-panic"
|
|
 |
1f016a |
# define XML_ATTR_HAVE_QUORUM "have-quorum"
|
|
 |
1f016a |
+# define XML_ATTR_HAVE_WATCHDOG "have-watchdog"
|
|
 |
1f016a |
# define XML_ATTR_EXPECTED_VOTES "expected-quorum-votes"
|
|
 |
1f016a |
# define XML_ATTR_GENERATION "epoch"
|
|
 |
1f016a |
# define XML_ATTR_GENERATION_ADMIN "admin_epoch"
|
|
 |
1f016a |
@@ -298,6 +299,8 @@
|
|
 |
1f016a |
# define XML_CONS_TAG_RSC_SET "resource_set"
|
|
 |
1f016a |
# define XML_CONS_ATTR_SYMMETRICAL "symmetrical"
|
|
 |
1f016a |
|
|
 |
1f016a |
+# define XML_LOCATION_ATTR_DISCOVERY "resource-discovery"
|
|
 |
1f016a |
+
|
|
 |
1f016a |
# define XML_COLOC_ATTR_SOURCE "rsc"
|
|
 |
1f016a |
# define XML_COLOC_ATTR_SOURCE_ROLE "rsc-role"
|
|
 |
1f016a |
# define XML_COLOC_ATTR_TARGET "with-rsc"
|
|
 |
1f016a |
@@ -321,6 +324,7 @@
|
|
 |
1f016a |
# define XML_NVPAIR_ATTR_VALUE "value"
|
|
 |
1f016a |
|
|
 |
1f016a |
# define XML_NODE_ATTR_STATE "state"
|
|
 |
1f016a |
+# define XML_NODE_ATTR_RSC_DISCOVERY "resource-discovery-enabled"
|
|
 |
1f016a |
|
|
 |
1f016a |
# define XML_CONFIG_ATTR_DC_DEADTIME "dc-deadtime"
|
|
 |
1f016a |
# define XML_CONFIG_ATTR_ELECTION_FAIL "election-timeout"
|
|
 |
1f016a |
diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h
|
|
 |
1f016a |
index d0ba856..f08a910 100644
|
|
 |
1f016a |
--- a/include/crm/pengine/status.h
|
|
 |
1f016a |
+++ b/include/crm/pengine/status.h
|
|
 |
1f016a |
@@ -136,6 +136,8 @@ struct node_shared_s {
|
|
 |
1f016a |
gboolean shutdown;
|
|
 |
1f016a |
gboolean expected_up;
|
|
 |
1f016a |
gboolean is_dc;
|
|
 |
1f016a |
+ gboolean rsc_discovery_enabled;
|
|
 |
1f016a |
+
|
|
 |
1f016a |
int num_resources;
|
|
 |
1f016a |
GListPtr running_rsc; /* resource_t* */
|
|
 |
1f016a |
GListPtr allocated_rsc; /* resource_t* */
|
|
 |
1f016a |
@@ -156,6 +158,7 @@ struct node_shared_s {
|
|
 |
1f016a |
struct node_s {
|
|
 |
1f016a |
int weight;
|
|
 |
1f016a |
gboolean fixed;
|
|
 |
1f016a |
+ int rsc_discover_mode;
|
|
 |
1f016a |
int count;
|
|
 |
1f016a |
struct node_shared_s *details;
|
|
 |
1f016a |
};
|
|
 |
1f016a |
@@ -252,6 +255,7 @@ struct resource_s {
|
|
 |
1f016a |
int migration_threshold;
|
|
 |
1f016a |
|
|
 |
1f016a |
gboolean is_remote_node;
|
|
 |
1f016a |
+ gboolean exclusive_discover;
|
|
 |
1f016a |
|
|
 |
1f016a |
unsigned long long flags;
|
|
 |
1f016a |
|
|
 |
1f016a |
diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c
|
|
 |
1f016a |
index 022a8ab..15b354b 100644
|
|
 |
1f016a |
--- a/lib/cib/cib_utils.c
|
|
 |
1f016a |
+++ b/lib/cib/cib_utils.c
|
|
 |
1f016a |
@@ -665,7 +665,7 @@ cib_native_callback(cib_t * cib, xmlNode * msg, int call_id, int rc)
|
|
 |
1f016a |
crm_trace("Invoking global callback for call %d", call_id);
|
|
 |
1f016a |
cib->op_callback(msg, call_id, rc, output);
|
|
 |
1f016a |
}
|
|
 |
1f016a |
- crm_trace("OP callback activated.");
|
|
 |
1f016a |
+ crm_trace("OP callback activated for %d", call_id);
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
void
|
|
 |
1f016a |
diff --git a/lib/common/mainloop.c b/lib/common/mainloop.c
|
|
 |
1f016a |
index ac395ec..f2295ff 100644
|
|
 |
1f016a |
--- a/lib/common/mainloop.c
|
|
 |
1f016a |
+++ b/lib/common/mainloop.c
|
|
 |
1f016a |
@@ -799,7 +799,7 @@ mainloop_add_fd(const char *name, int priority, int fd, void *userdata,
|
|
 |
1f016a |
{
|
|
 |
1f016a |
mainloop_io_t *client = NULL;
|
|
 |
1f016a |
|
|
 |
1f016a |
- if (fd > 0) {
|
|
 |
1f016a |
+ if (fd >= 0) {
|
|
 |
1f016a |
client = calloc(1, sizeof(mainloop_io_t));
|
|
 |
1f016a |
client->name = strdup(name);
|
|
 |
1f016a |
client->userdata = userdata;
|
|
 |
1f016a |
diff --git a/lib/common/utils.c b/lib/common/utils.c
|
|
 |
1f016a |
index dc54e6d..6b8b12c 100644
|
|
 |
1f016a |
--- a/lib/common/utils.c
|
|
 |
1f016a |
+++ b/lib/common/utils.c
|
|
 |
1f016a |
@@ -2008,6 +2008,7 @@ attrd_update_delegate(crm_ipc_t * ipc, char command, const char *host, const cha
|
|
 |
1f016a |
case 'u':
|
|
 |
1f016a |
crm_xml_add(update, F_ATTRD_TASK, "update");
|
|
 |
1f016a |
crm_xml_add(update, F_ATTRD_REGEX, name);
|
|
 |
1f016a |
+ break;
|
|
 |
1f016a |
case 'D':
|
|
 |
1f016a |
case 'U':
|
|
 |
1f016a |
case 'v':
|
|
 |
1f016a |
diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c
|
|
 |
1f016a |
index 7d70f91..3c616cd 100644
|
|
 |
1f016a |
--- a/lib/common/watchdog.c
|
|
 |
1f016a |
+++ b/lib/common/watchdog.c
|
|
 |
1f016a |
@@ -211,10 +211,10 @@ pcmk_panic_local(void)
|
|
 |
1f016a |
|
|
 |
1f016a |
if(ppid > 1) {
|
|
 |
1f016a |
/* child daemon */
|
|
 |
1f016a |
- crm_exit(pcmk_err_panic);
|
|
 |
1f016a |
+ exit(pcmk_err_panic);
|
|
 |
1f016a |
} else {
|
|
 |
1f016a |
/* pacemakerd or orphan child */
|
|
 |
1f016a |
- crm_exit(DAEMON_RESPAWN_STOP);
|
|
 |
1f016a |
+ exit(DAEMON_RESPAWN_STOP);
|
|
 |
1f016a |
}
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
@@ -222,6 +222,7 @@ static void
|
|
 |
1f016a |
pcmk_panic_sbd(void)
|
|
 |
1f016a |
{
|
|
 |
1f016a |
union sigval signal_value;
|
|
 |
1f016a |
+ pid_t ppid = getppid();
|
|
 |
1f016a |
|
|
 |
1f016a |
do_crm_log_always(LOG_EMERG, "Signaling sbd(%d) to panic", sbd_pid);
|
|
 |
1f016a |
|
|
 |
1f016a |
@@ -232,7 +233,13 @@ pcmk_panic_sbd(void)
|
|
 |
1f016a |
pcmk_panic_local();
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
- crm_exit(DAEMON_RESPAWN_STOP);
|
|
 |
1f016a |
+ if(ppid > 1) {
|
|
 |
1f016a |
+ /* child daemon */
|
|
 |
1f016a |
+ exit(pcmk_err_panic);
|
|
 |
1f016a |
+ } else {
|
|
 |
1f016a |
+ /* pacemakerd or orphan child */
|
|
 |
1f016a |
+ exit(DAEMON_RESPAWN_STOP);
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
void
|
|
 |
1f016a |
@@ -275,17 +282,27 @@ pcmk_locate_sbd(void)
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
/* Look for the pid file */
|
|
 |
1f016a |
+ pidfile = g_strdup_printf("%s/sbd.pid", HA_STATE_DIR);
|
|
 |
1f016a |
|
|
 |
1f016a |
/* Read the pid file */
|
|
 |
1f016a |
if(pidfile) {
|
|
 |
1f016a |
int rc = crm_pidfile_inuse(pidfile, 1);
|
|
 |
1f016a |
if(rc < pcmk_ok && rc != -ENOENT) {
|
|
 |
1f016a |
sbd_pid = crm_read_pidfile(pidfile);
|
|
 |
1f016a |
+ crm_trace("SBD detected at pid=%d (file)");
|
|
 |
1f016a |
}
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
- /* Fall back to /proc for systems that support it */
|
|
 |
1f016a |
- sbd_pid = pcmk_locate_proc_entry("sbd");
|
|
 |
1f016a |
+ if(sbd_pid < 0) {
|
|
 |
1f016a |
+ /* Fall back to /proc for systems that support it */
|
|
 |
1f016a |
+ sbd_pid = pcmk_locate_proc_entry("sbd");
|
|
 |
1f016a |
+ crm_trace("SBD detected at pid=%d (proc)");
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ if(sbd_pid < 0) {
|
|
 |
1f016a |
+ sbd_pid = 0;
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
|
|
 |
1f016a |
+ free(pidfile);
|
|
 |
1f016a |
return sbd_pid;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
diff --git a/lib/common/xml.c b/lib/common/xml.c
|
|
 |
1f016a |
index 06de44c..58d0a00 100644
|
|
 |
1f016a |
--- a/lib/common/xml.c
|
|
 |
1f016a |
+++ b/lib/common/xml.c
|
|
 |
1f016a |
@@ -386,6 +386,7 @@ static int __xml_build_schema_list(void)
|
|
 |
1f016a |
xslt = get_schema_path(NULL, transform);
|
|
 |
1f016a |
if(stat(xslt, &s) != 0) {
|
|
 |
1f016a |
crm_err("Transform %s not found", xslt);
|
|
 |
1f016a |
+ free(xslt);
|
|
 |
1f016a |
__xml_schema_add(2, version, NULL, NULL, NULL, -1);
|
|
 |
1f016a |
break;
|
|
 |
1f016a |
} else {
|
|
 |
1f016a |
@@ -1950,9 +1951,11 @@ bool xml_patch_versions(xmlNode *patchset, int add[3], int del[3])
|
|
 |
1f016a |
return -EINVAL;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
- for(lpc = 0; lpc < DIMOF(vfields); lpc++) {
|
|
 |
1f016a |
- crm_element_value_int(tmp, vfields[lpc], &(del[lpc]));
|
|
 |
1f016a |
- crm_trace("Got %d for del[%s]", del[lpc], vfields[lpc]);
|
|
 |
1f016a |
+ if (tmp) {
|
|
 |
1f016a |
+ for(lpc = 0; lpc < DIMOF(vfields); lpc++) {
|
|
 |
1f016a |
+ crm_element_value_int(tmp, vfields[lpc], &(del[lpc]));
|
|
 |
1f016a |
+ crm_trace("Got %d for del[%s]", del[lpc], vfields[lpc]);
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
switch(format) {
|
|
 |
1f016a |
@@ -1973,9 +1976,11 @@ bool xml_patch_versions(xmlNode *patchset, int add[3], int del[3])
|
|
 |
1f016a |
return -EINVAL;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
- for(lpc = 0; lpc < DIMOF(vfields); lpc++) {
|
|
 |
1f016a |
- crm_element_value_int(tmp, vfields[lpc], &(add[lpc]));
|
|
 |
1f016a |
- crm_trace("Got %d for add[%s]", add[lpc], vfields[lpc]);
|
|
 |
1f016a |
+ if (tmp) {
|
|
 |
1f016a |
+ for(lpc = 0; lpc < DIMOF(vfields); lpc++) {
|
|
 |
1f016a |
+ crm_element_value_int(tmp, vfields[lpc], &(add[lpc]));
|
|
 |
1f016a |
+ crm_trace("Got %d for add[%s]", add[lpc], vfields[lpc]);
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
return pcmk_ok;
|
|
 |
1f016a |
diff --git a/lib/pengine/common.c b/lib/pengine/common.c
|
|
 |
1f016a |
index e98e1d3..e5e4ea7 100644
|
|
 |
1f016a |
--- a/lib/pengine/common.c
|
|
 |
1f016a |
+++ b/lib/pengine/common.c
|
|
 |
1f016a |
@@ -106,6 +106,8 @@ pe_cluster_option pe_opts[] = {
|
|
 |
1f016a |
"Action to send to STONITH device", NULL },
|
|
 |
1f016a |
{ "stonith-timeout", NULL, "time", NULL, "60s", &check_timer,
|
|
 |
1f016a |
"How long to wait for the STONITH action to complete", NULL },
|
|
 |
1f016a |
+ { XML_ATTR_HAVE_WATCHDOG, NULL, "boolean", NULL, "false", &check_boolean,
|
|
 |
1f016a |
+ "Enable watchdog integration", "Set automatically by the cluster if SBD is detected. User configured values are ignored." },
|
|
 |
1f016a |
{ "startup-fencing", "startup_fencing", "boolean", NULL, "true", &check_boolean,
|
|
 |
1f016a |
"STONITH unseen nodes", "Advanced Use Only! Not using the default is very unsafe!" },
|
|
 |
1f016a |
|
|
 |
1f016a |
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
|
|
 |
1f016a |
index 7127c12..a19bdff 100644
|
|
 |
1f016a |
--- a/lib/pengine/unpack.c
|
|
 |
1f016a |
+++ b/lib/pengine/unpack.c
|
|
 |
1f016a |
@@ -140,6 +140,12 @@ unpack_config(xmlNode * config, pe_working_set_t * data_set)
|
|
 |
1f016a |
crm_info("Startup probes: disabled (dangerous)");
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
+ value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG);
|
|
 |
1f016a |
+ if (value && crm_is_true(value)) {
|
|
 |
1f016a |
+ crm_notice("Relying on watchdog integration for fencing");
|
|
 |
1f016a |
+ set_bit(data_set->flags, pe_flag_have_stonith_resource);
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+
|
|
 |
1f016a |
value = pe_pref(data_set->config_hash, "stonith-timeout");
|
|
 |
1f016a |
data_set->stonith_timeout = crm_get_msec(value);
|
|
 |
1f016a |
crm_debug("STONITH timeout: %d", data_set->stonith_timeout);
|
|
 |
1f016a |
@@ -294,6 +300,7 @@ create_node(const char *id, const char *uname, const char *type, const char *sco
|
|
 |
1f016a |
new_node->details->uname = uname;
|
|
 |
1f016a |
new_node->details->online = FALSE;
|
|
 |
1f016a |
new_node->details->shutdown = FALSE;
|
|
 |
1f016a |
+ new_node->details->rsc_discovery_enabled = TRUE;
|
|
 |
1f016a |
new_node->details->running_rsc = NULL;
|
|
 |
1f016a |
new_node->details->type = node_ping;
|
|
 |
1f016a |
|
|
 |
1f016a |
@@ -308,6 +315,13 @@ create_node(const char *id, const char *uname, const char *type, const char *sco
|
|
 |
1f016a |
new_node->details->attrs = g_hash_table_new_full(crm_str_hash, g_str_equal,
|
|
 |
1f016a |
g_hash_destroy_str,
|
|
 |
1f016a |
g_hash_destroy_str);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ if (is_remote_node(new_node)) {
|
|
 |
1f016a |
+ g_hash_table_insert(new_node->details->attrs, strdup("#kind"), strdup("remote"));
|
|
 |
1f016a |
+ } else {
|
|
 |
1f016a |
+ g_hash_table_insert(new_node->details->attrs, strdup("#kind"), strdup("cluster"));
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+
|
|
 |
1f016a |
new_node->details->utilization =
|
|
 |
1f016a |
g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str,
|
|
 |
1f016a |
g_hash_destroy_str);
|
|
 |
1f016a |
@@ -657,7 +671,10 @@ link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc)
|
|
 |
1f016a |
* as cluster nodes. */
|
|
 |
1f016a |
if (new_rsc->container == NULL) {
|
|
 |
1f016a |
handle_startup_fencing(data_set, remote_node);
|
|
 |
1f016a |
- return;
|
|
 |
1f016a |
+ } else {
|
|
 |
1f016a |
+ /* At this point we know if the remote node is a container or baremetal
|
|
 |
1f016a |
+ * remote node, update the #kind attribute if a container is involved */
|
|
 |
1f016a |
+ g_hash_table_replace(remote_node->details->attrs, strdup("#kind"), strdup("container"));
|
|
 |
1f016a |
}
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
@@ -723,10 +740,12 @@ unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set)
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority);
|
|
 |
1f016a |
+ if (is_set(data_set->flags, pe_flag_quick_location)) {
|
|
 |
1f016a |
+ /* Ignore */
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ } else if (is_set(data_set->flags, pe_flag_stonith_enabled)
|
|
 |
1f016a |
+ && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) {
|
|
 |
1f016a |
|
|
 |
1f016a |
- if (is_not_set(data_set->flags, pe_flag_quick_location)
|
|
 |
1f016a |
- && is_set(data_set->flags, pe_flag_stonith_enabled)
|
|
 |
1f016a |
- && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) {
|
|
 |
1f016a |
crm_config_err("Resource start-up disabled since no STONITH resources have been defined");
|
|
 |
1f016a |
crm_config_err("Either configure some or disable STONITH with the stonith-enabled option");
|
|
 |
1f016a |
crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
|
|
 |
1f016a |
@@ -988,6 +1007,7 @@ unpack_status(xmlNode * status, pe_working_set_t * data_set)
|
|
 |
1f016a |
|
|
 |
1f016a |
if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) {
|
|
 |
1f016a |
xmlNode *attrs = NULL;
|
|
 |
1f016a |
+ const char *resource_discovery_enabled = NULL;
|
|
 |
1f016a |
|
|
 |
1f016a |
id = crm_element_value(state, XML_ATTR_ID);
|
|
 |
1f016a |
uname = crm_element_value(state, XML_ATTR_UNAME);
|
|
 |
1f016a |
@@ -1027,6 +1047,12 @@ unpack_status(xmlNode * status, pe_working_set_t * data_set)
|
|
 |
1f016a |
this_node->details->maintenance = TRUE;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
+ resource_discovery_enabled = g_hash_table_lookup(this_node->details->attrs, XML_NODE_ATTR_RSC_DISCOVERY);
|
|
 |
1f016a |
+ if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
|
|
 |
1f016a |
+ crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes",
|
|
 |
1f016a |
+ XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname);
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+
|
|
 |
1f016a |
crm_trace("determining node state");
|
|
 |
1f016a |
determine_online_status(state, this_node, data_set);
|
|
 |
1f016a |
|
|
 |
1f016a |
@@ -1102,6 +1128,7 @@ unpack_remote_status(xmlNode * status, pe_working_set_t * data_set)
|
|
 |
1f016a |
|
|
 |
1f016a |
/* process attributes */
|
|
 |
1f016a |
for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) {
|
|
 |
1f016a |
+ const char *resource_discovery_enabled = NULL;
|
|
 |
1f016a |
xmlNode *attrs = NULL;
|
|
 |
1f016a |
if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
|
|
 |
1f016a |
continue;
|
|
 |
1f016a |
@@ -1125,6 +1152,26 @@ unpack_remote_status(xmlNode * status, pe_working_set_t * data_set)
|
|
 |
1f016a |
crm_info("Node %s is in standby-mode", this_node->details->uname);
|
|
 |
1f016a |
this_node->details->standby = TRUE;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance"))) {
|
|
 |
1f016a |
+ crm_info("Node %s is in maintenance-mode", this_node->details->uname);
|
|
 |
1f016a |
+ this_node->details->maintenance = TRUE;
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ resource_discovery_enabled = g_hash_table_lookup(this_node->details->attrs, XML_NODE_ATTR_RSC_DISCOVERY);
|
|
 |
1f016a |
+ if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
|
|
 |
1f016a |
+ if (is_baremetal_remote_node(this_node) && is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
|
|
 |
1f016a |
+ crm_warn("ignoring %s attribute on baremetal remote node %s, disabling resource discovery requires stonith to be enabled.",
|
|
 |
1f016a |
+ XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname);
|
|
 |
1f016a |
+ } else {
|
|
 |
1f016a |
+ /* if we're here, this is either a baremetal node and fencing is enabled,
|
|
 |
1f016a |
+ * or this is a container node which we don't care if fencing is enabled
|
|
 |
1f016a |
+ * or not on. container nodes are 'fenced' by recovering the container resource
|
|
 |
1f016a |
+ * regardless of whether fencing is enabled. */
|
|
 |
1f016a |
+ crm_info("Node %s has resource discovery disabled", this_node->details->uname);
|
|
 |
1f016a |
+ this_node->details->rsc_discovery_enabled = FALSE;
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
/* process node rsc status */
|
|
 |
1f016a |
@@ -1858,7 +1905,7 @@ process_rsc_state(resource_t * rsc, node_t * node,
|
|
 |
1f016a |
* reconnect to the remote-node in this transition or not. */
|
|
 |
1f016a |
if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) {
|
|
 |
1f016a |
node_t *tmpnode = pe_find_node(data_set->nodes, rsc->id);
|
|
 |
1f016a |
- if (tmpnode->details->unclean) {
|
|
 |
1f016a |
+ if (tmpnode && tmpnode->details->unclean) {
|
|
 |
1f016a |
tmpnode->details->unseen = FALSE;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
}
|
|
 |
1f016a |
@@ -3050,8 +3097,7 @@ add_node_attrs(xmlNode * xml_obj, node_t * node, gboolean overwrite, pe_working_
|
|
 |
1f016a |
|
|
 |
1f016a |
g_hash_table_insert(node->details->attrs,
|
|
 |
1f016a |
strdup("#uname"), strdup(node->details->uname));
|
|
 |
1f016a |
- g_hash_table_insert(node->details->attrs,
|
|
 |
1f016a |
- strdup("#kind"), strdup(node->details->remote_rsc?"container":"cluster"));
|
|
 |
1f016a |
+
|
|
 |
1f016a |
g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_ID), strdup(node->details->id));
|
|
 |
1f016a |
if (safe_str_eq(node->details->id, data_set->dc_uuid)) {
|
|
 |
1f016a |
data_set->dc_node = node;
|
|
 |
1f016a |
diff --git a/lib/services/dbus.c b/lib/services/dbus.c
|
|
 |
1f016a |
index 587589c..f4632f2 100644
|
|
 |
1f016a |
--- a/lib/services/dbus.c
|
|
 |
1f016a |
+++ b/lib/services/dbus.c
|
|
 |
1f016a |
@@ -355,6 +355,11 @@ pcmk_dbus_get_property(
|
|
 |
1f016a |
DBusMessage *reply = pcmk_dbus_send_recv(msg, connection, NULL);
|
|
 |
1f016a |
|
|
 |
1f016a |
output = pcmk_dbus_lookup_result(reply, query_data);
|
|
 |
1f016a |
+ free(query_data->target);
|
|
 |
1f016a |
+ free(query_data->object);
|
|
 |
1f016a |
+ free(query_data->name);
|
|
 |
1f016a |
+ free(query_data);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
if(reply) {
|
|
 |
1f016a |
dbus_message_unref(reply);
|
|
 |
1f016a |
}
|
|
 |
1f016a |
diff --git a/lib/services/services.c b/lib/services/services.c
|
|
 |
1f016a |
index 8590b56..753e257 100644
|
|
 |
1f016a |
--- a/lib/services/services.c
|
|
 |
1f016a |
+++ b/lib/services/services.c
|
|
 |
1f016a |
@@ -313,6 +313,7 @@ services_action_free(svc_action_t * op)
|
|
 |
1f016a |
|
|
 |
1f016a |
if (op->opaque->repeat_timer) {
|
|
 |
1f016a |
g_source_remove(op->opaque->repeat_timer);
|
|
 |
1f016a |
+ op->opaque->repeat_timer = 0;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
if (op->opaque->stderr_gsource) {
|
|
 |
1f016a |
mainloop_del_fd(op->opaque->stderr_gsource);
|
|
 |
1f016a |
@@ -425,6 +426,7 @@ services_action_kick(const char *name, const char *action, int interval /* ms */
|
|
 |
1f016a |
} else {
|
|
 |
1f016a |
if (op->opaque->repeat_timer) {
|
|
 |
1f016a |
g_source_remove(op->opaque->repeat_timer);
|
|
 |
1f016a |
+ op->opaque->repeat_timer = 0;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
recurring_action_timer(op);
|
|
 |
1f016a |
return TRUE;
|
|
 |
1f016a |
@@ -459,6 +461,7 @@ handle_duplicate_recurring(svc_action_t * op, void (*action_callback) (svc_actio
|
|
 |
1f016a |
if (dup->pid != 0) {
|
|
 |
1f016a |
if (op->opaque->repeat_timer) {
|
|
 |
1f016a |
g_source_remove(op->opaque->repeat_timer);
|
|
 |
1f016a |
+ op->opaque->repeat_timer = 0;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
recurring_action_timer(dup);
|
|
 |
1f016a |
}
|
|
 |
1f016a |
diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c
|
|
 |
1f016a |
index 961ff18..2279e4e 100644
|
|
 |
1f016a |
--- a/lib/services/services_linux.c
|
|
 |
1f016a |
+++ b/lib/services/services_linux.c
|
|
 |
1f016a |
@@ -227,6 +227,7 @@ recurring_action_timer(gpointer data)
|
|
 |
1f016a |
op->stdout_data = NULL;
|
|
 |
1f016a |
free(op->stderr_data);
|
|
 |
1f016a |
op->stderr_data = NULL;
|
|
 |
1f016a |
+ op->opaque->repeat_timer = 0;
|
|
 |
1f016a |
|
|
 |
1f016a |
services_action_async(op, NULL);
|
|
 |
1f016a |
return FALSE;
|
|
 |
1f016a |
diff --git a/lib/services/systemd.c b/lib/services/systemd.c
|
|
 |
1f016a |
index c967430..9a7b078 100644
|
|
 |
1f016a |
--- a/lib/services/systemd.c
|
|
 |
1f016a |
+++ b/lib/services/systemd.c
|
|
 |
1f016a |
@@ -303,10 +303,14 @@ systemd_unit_listall(void)
|
|
 |
1f016a |
gboolean
|
|
 |
1f016a |
systemd_unit_exists(const char *name)
|
|
 |
1f016a |
{
|
|
 |
1f016a |
+ char *unit = NULL;
|
|
 |
1f016a |
+
|
|
 |
1f016a |
/* Note: Makes a blocking dbus calls
|
|
 |
1f016a |
* Used by resources_find_service_class() when resource class=service
|
|
 |
1f016a |
*/
|
|
 |
1f016a |
- if(systemd_unit_by_name(name, NULL)) {
|
|
 |
1f016a |
+ unit = systemd_unit_by_name(name, NULL);
|
|
 |
1f016a |
+ if(unit) {
|
|
 |
1f016a |
+ free(unit);
|
|
 |
1f016a |
return TRUE;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
return FALSE;
|
|
 |
1f016a |
@@ -542,9 +546,15 @@ systemd_unit_exec_with_unit(svc_action_t * op, const char *unit)
|
|
 |
1f016a |
|
|
 |
1f016a |
reply = pcmk_dbus_send_recv(msg, systemd_proxy, &error);
|
|
 |
1f016a |
systemd_exec_result(reply, op);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
if(reply) {
|
|
 |
1f016a |
dbus_message_unref(reply);
|
|
 |
1f016a |
}
|
|
 |
1f016a |
+ if(msg) {
|
|
 |
1f016a |
+ dbus_message_unref(msg);
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ return FALSE;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
if(msg) {
|
|
 |
1f016a |
@@ -563,6 +573,8 @@ systemd_unit_exec_with_unit(svc_action_t * op, const char *unit)
|
|
 |
1f016a |
gboolean
|
|
 |
1f016a |
systemd_unit_exec(svc_action_t * op)
|
|
 |
1f016a |
{
|
|
 |
1f016a |
+ char *unit = NULL;
|
|
 |
1f016a |
+
|
|
 |
1f016a |
CRM_ASSERT(op);
|
|
 |
1f016a |
CRM_ASSERT(systemd_init());
|
|
 |
1f016a |
op->rc = PCMK_OCF_UNKNOWN_ERROR;
|
|
 |
1f016a |
@@ -580,7 +592,9 @@ systemd_unit_exec(svc_action_t * op)
|
|
 |
1f016a |
return TRUE;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
- systemd_unit_by_name(op->agent, op);
|
|
 |
1f016a |
+ unit = systemd_unit_by_name(op->agent, op);
|
|
 |
1f016a |
+ free(unit);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
if (op->synchronous == FALSE) {
|
|
 |
1f016a |
return TRUE;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c
|
|
 |
1f016a |
index 7075b9f..d3ede18 100644
|
|
 |
1f016a |
--- a/lrmd/lrmd.c
|
|
 |
1f016a |
+++ b/lrmd/lrmd.c
|
|
 |
1f016a |
@@ -837,7 +837,9 @@ action_complete(svc_action_t * action)
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
cmd_reset(cmd);
|
|
 |
1f016a |
- rsc->active = NULL;
|
|
 |
1f016a |
+ if(rsc) {
|
|
 |
1f016a |
+ rsc->active = NULL;
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
schedule_lrmd_cmd(rsc, cmd);
|
|
 |
1f016a |
return;
|
|
 |
1f016a |
|
|
 |
1f016a |
diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c
|
|
 |
1f016a |
index ba3c88f..c7852c3 100644
|
|
 |
1f016a |
--- a/mcp/pacemaker.c
|
|
 |
1f016a |
+++ b/mcp/pacemaker.c
|
|
 |
1f016a |
@@ -1038,6 +1038,10 @@ main(int argc, char **argv)
|
|
 |
1f016a |
crm_exit(ENOPROTOOPT);
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
+ if(pcmk_locate_sbd() > 0) {
|
|
 |
1f016a |
+ setenv("PCMK_watchdog", "true", 1);
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+
|
|
 |
1f016a |
find_and_track_existing_processes();
|
|
 |
1f016a |
|
|
 |
1f016a |
cluster.destroy = mcp_cpg_destroy;
|
|
 |
1f016a |
diff --git a/mcp/pacemaker.service.in b/mcp/pacemaker.service.in
|
|
 |
1f016a |
index b9f3336..7871167 100644
|
|
 |
1f016a |
--- a/mcp/pacemaker.service.in
|
|
 |
1f016a |
+++ b/mcp/pacemaker.service.in
|
|
 |
1f016a |
@@ -18,6 +18,7 @@ KillMode=process
|
|
 |
1f016a |
NotifyAccess=main
|
|
 |
1f016a |
SysVStartPriority=99
|
|
 |
1f016a |
EnvironmentFile=-@sysconfdir@/sysconfig/pacemaker
|
|
 |
1f016a |
+EnvironmentFile=-@sysconfdir@/sysconfig/sbd
|
|
 |
1f016a |
SuccessExitStatus=100
|
|
 |
1f016a |
|
|
 |
1f016a |
ExecStart=@sbindir@/pacemakerd -f
|
|
 |
1f016a |
diff --git a/pengine/allocate.c b/pengine/allocate.c
|
|
 |
1f016a |
index 8d02d9b..e708e26 100644
|
|
 |
1f016a |
--- a/pengine/allocate.c
|
|
 |
1f016a |
+++ b/pengine/allocate.c
|
|
 |
1f016a |
@@ -755,7 +755,7 @@ apply_system_health(pe_working_set_t * data_set)
|
|
 |
1f016a |
for (; gIter2 != NULL; gIter2 = gIter2->next) {
|
|
 |
1f016a |
resource_t *rsc = (resource_t *) gIter2->data;
|
|
 |
1f016a |
|
|
 |
1f016a |
- rsc2node_new(health_strategy, rsc, system_health, node, data_set);
|
|
 |
1f016a |
+ rsc2node_new(health_strategy, rsc, system_health, NULL, node, data_set);
|
|
 |
1f016a |
}
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
@@ -861,6 +861,10 @@ probe_resources(pe_working_set_t * data_set)
|
|
 |
1f016a |
/* TODO enable container node probes once ordered probing is implemented. */
|
|
 |
1f016a |
continue;
|
|
 |
1f016a |
|
|
 |
1f016a |
+ } else if (node->details->rsc_discovery_enabled == FALSE) {
|
|
 |
1f016a |
+ /* resource discovery is disabled for this node */
|
|
 |
1f016a |
+ continue;
|
|
 |
1f016a |
+
|
|
 |
1f016a |
} else if (probe_complete == NULL) {
|
|
 |
1f016a |
probe_complete = get_pseudo_op(CRM_OP_PROBED, data_set);
|
|
 |
1f016a |
if (is_set(data_set->flags, pe_flag_have_remote_nodes)) {
|
|
 |
1f016a |
diff --git a/pengine/constraints.c b/pengine/constraints.c
|
|
 |
1f016a |
index 1aa848e..88e382b 100644
|
|
 |
1f016a |
--- a/pengine/constraints.c
|
|
 |
1f016a |
+++ b/pengine/constraints.c
|
|
 |
1f016a |
@@ -658,6 +658,7 @@ unpack_rsc_location(xmlNode * xml_obj, resource_t * rsc_lh, const char * role,
|
|
 |
1f016a |
const char *id_lh = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE);
|
|
 |
1f016a |
const char *id = crm_element_value(xml_obj, XML_ATTR_ID);
|
|
 |
1f016a |
const char *node = crm_element_value(xml_obj, XML_CIB_TAG_NODE);
|
|
 |
1f016a |
+ const char *discovery = crm_element_value(xml_obj, XML_LOCATION_ATTR_DISCOVERY);
|
|
 |
1f016a |
|
|
 |
1f016a |
if (rsc_lh == NULL) {
|
|
 |
1f016a |
/* only a warn as BSC adds the constraint then the resource */
|
|
 |
1f016a |
@@ -676,7 +677,7 @@ unpack_rsc_location(xmlNode * xml_obj, resource_t * rsc_lh, const char * role,
|
|
 |
1f016a |
if (!match) {
|
|
 |
1f016a |
return FALSE;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
- location = rsc2node_new(id, rsc_lh, score_i, match, data_set);
|
|
 |
1f016a |
+ location = rsc2node_new(id, rsc_lh, score_i, discovery, match, data_set);
|
|
 |
1f016a |
|
|
 |
1f016a |
} else {
|
|
 |
1f016a |
xmlNode *rule_xml = NULL;
|
|
 |
1f016a |
@@ -720,6 +721,7 @@ unpack_rsc_location(xmlNode * xml_obj, resource_t * rsc_lh, const char * role,
|
|
 |
1f016a |
}
|
|
 |
1f016a |
}
|
|
 |
1f016a |
}
|
|
 |
1f016a |
+
|
|
 |
1f016a |
return TRUE;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
@@ -958,7 +960,7 @@ generate_location_rule(resource_t * rsc, xmlNode * rule_xml, pe_working_set_t *
|
|
 |
1f016a |
do_and = FALSE;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
- location_rule = rsc2node_new(rule_id, rsc, 0, NULL, data_set);
|
|
 |
1f016a |
+ location_rule = rsc2node_new(rule_id, rsc, 0, NULL, NULL, data_set);
|
|
 |
1f016a |
|
|
 |
1f016a |
if (location_rule == NULL) {
|
|
 |
1f016a |
return NULL;
|
|
 |
1f016a |
diff --git a/pengine/native.c b/pengine/native.c
|
|
 |
1f016a |
index 5db8c60..0020344 100644
|
|
 |
1f016a |
--- a/pengine/native.c
|
|
 |
1f016a |
+++ b/pengine/native.c
|
|
 |
1f016a |
@@ -2097,9 +2097,14 @@ native_rsc_location(resource_t * rsc, rsc_to_node_t * constraint)
|
|
 |
1f016a |
other_node->weight = merge_weights(other_node->weight, node->weight);
|
|
 |
1f016a |
|
|
 |
1f016a |
} else {
|
|
 |
1f016a |
- node_t *new_node = node_copy(node);
|
|
 |
1f016a |
+ other_node = node_copy(node);
|
|
 |
1f016a |
|
|
 |
1f016a |
- g_hash_table_insert(rsc->allowed_nodes, (gpointer) new_node->details->id, new_node);
|
|
 |
1f016a |
+ g_hash_table_insert(rsc->allowed_nodes, (gpointer) other_node->details->id, other_node);
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ if (other_node->rsc_discover_mode < constraint->discover_mode) {
|
|
 |
1f016a |
+ /* exclusive > never > always... always is default */
|
|
 |
1f016a |
+ other_node->rsc_discover_mode = constraint->discover_mode;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
@@ -2704,6 +2709,7 @@ native_create_probe(resource_t * rsc, node_t * node, action_t * complete,
|
|
 |
1f016a |
char *key = NULL;
|
|
 |
1f016a |
action_t *probe = NULL;
|
|
 |
1f016a |
node_t *running = NULL;
|
|
 |
1f016a |
+ node_t *allowed = NULL;
|
|
 |
1f016a |
resource_t *top = uber_parent(rsc);
|
|
 |
1f016a |
|
|
 |
1f016a |
static const char *rc_master = NULL;
|
|
 |
1f016a |
@@ -2780,6 +2786,23 @@ native_create_probe(resource_t * rsc, node_t * node, action_t * complete,
|
|
 |
1f016a |
return FALSE;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
+ allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
|
|
 |
1f016a |
+ if (rsc->exclusive_discover || top->exclusive_discover) {
|
|
 |
1f016a |
+ if (allowed == NULL) {
|
|
 |
1f016a |
+ /* exclusive discover is enabled and this node is not in the allowed list. */
|
|
 |
1f016a |
+ return FALSE;
|
|
 |
1f016a |
+ } else if (allowed->rsc_discover_mode != discover_exclusive) {
|
|
 |
1f016a |
+ /* exclusive discover is enabled and this node is not marked
|
|
 |
1f016a |
+ * as a node this resource should be discovered on */
|
|
 |
1f016a |
+ return FALSE;
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+ if (allowed && allowed->rsc_discover_mode == discover_never) {
|
|
 |
1f016a |
+ /* this resource is marked as not needing to be discovered on this node */
|
|
 |
1f016a |
+ return FALSE;
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+
|
|
 |
1f016a |
key = generate_op_key(rsc->id, RSC_STATUS, 0);
|
|
 |
1f016a |
probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE, data_set);
|
|
 |
1f016a |
update_action_flags(probe, pe_action_optional | pe_action_clear);
|
|
 |
1f016a |
diff --git a/pengine/pengine.h b/pengine/pengine.h
|
|
 |
1f016a |
index 653fadf..87fa150 100644
|
|
 |
1f016a |
--- a/pengine/pengine.h
|
|
 |
1f016a |
+++ b/pengine/pengine.h
|
|
 |
1f016a |
@@ -75,11 +75,18 @@ struct rsc_ticket_s {
|
|
 |
1f016a |
int role_lh;
|
|
 |
1f016a |
};
|
|
 |
1f016a |
|
|
 |
1f016a |
+enum rsc_discover_e {
|
|
 |
1f016a |
+ discover_always = 0,
|
|
 |
1f016a |
+ discover_never,
|
|
 |
1f016a |
+ discover_exclusive,
|
|
 |
1f016a |
+};
|
|
 |
1f016a |
+
|
|
 |
1f016a |
struct rsc_to_node_s {
|
|
 |
1f016a |
const char *id;
|
|
 |
1f016a |
resource_t *rsc_lh;
|
|
 |
1f016a |
|
|
 |
1f016a |
enum rsc_role_e role_filter;
|
|
 |
1f016a |
+ enum rsc_discover_e discover_mode;
|
|
 |
1f016a |
GListPtr node_list_rh; /* node_t* */
|
|
 |
1f016a |
};
|
|
 |
1f016a |
|
|
 |
1f016a |
diff --git a/pengine/regression.sh b/pengine/regression.sh
|
|
 |
1f016a |
index bdc7d3a..a9a5605 100755
|
|
 |
1f016a |
--- a/pengine/regression.sh
|
|
 |
1f016a |
+++ b/pengine/regression.sh
|
|
 |
1f016a |
@@ -768,5 +768,10 @@ do_test remote-disable "Disable a baremetal remote-node"
|
|
 |
1f016a |
do_test remote-orphaned "Properly shutdown orphaned connection resource"
|
|
 |
1f016a |
do_test remote-recover "Recover connection resource after cluster-node fails."
|
|
 |
1f016a |
do_test remote-stale-node-entry "Make sure we properly handle leftover remote-node entries in the node section"
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+echo ""
|
|
 |
1f016a |
+do_test resource-discovery "Exercises resource-discovery location constraint option."
|
|
 |
1f016a |
+do_test rsc-discovery-per-node "Disable resource discovery per node"
|
|
 |
1f016a |
+
|
|
 |
1f016a |
echo ""
|
|
 |
1f016a |
test_results
|
|
 |
1f016a |
diff --git a/pengine/test10/remote-disable.dot b/pengine/test10/remote-disable.dot
|
|
 |
1f016a |
index 2e21fef..fbcea60 100644
|
|
 |
1f016a |
--- a/pengine/test10/remote-disable.dot
|
|
 |
1f016a |
+++ b/pengine/test10/remote-disable.dot
|
|
 |
1f016a |
@@ -1,8 +1,4 @@
|
|
 |
1f016a |
digraph "g" {
|
|
 |
1f016a |
-"FAKE2_monitor_60000 18builder" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
-"FAKE2_start_0 18builder" -> "FAKE2_monitor_60000 18builder" [ style = bold]
|
|
 |
1f016a |
-"FAKE2_start_0 18builder" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
-"FAKE2_stop_0 remote1" -> "FAKE2_start_0 18builder" [ style = bold]
|
|
 |
1f016a |
"FAKE2_stop_0 remote1" -> "all_stopped" [ style = bold]
|
|
 |
1f016a |
"FAKE2_stop_0 remote1" -> "remote1_stop_0 18builder" [ style = bold]
|
|
 |
1f016a |
"FAKE2_stop_0 remote1" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
diff --git a/pengine/test10/remote-disable.exp b/pengine/test10/remote-disable.exp
|
|
 |
1f016a |
index 3273bb2..99c1c55 100644
|
|
 |
1f016a |
--- a/pengine/test10/remote-disable.exp
|
|
 |
1f016a |
+++ b/pengine/test10/remote-disable.exp
|
|
 |
1f016a |
@@ -14,32 +14,6 @@
|
|
 |
1f016a |
</synapse>
|
|
 |
1f016a |
<synapse id="1">
|
|
 |
1f016a |
<action_set>
|
|
 |
1f016a |
- <rsc_op id="20" operation="monitor" operation_key="FAKE2_monitor_60000" on_node="18builder" on_node_uuid="5">
|
|
 |
1f016a |
- <primitive id="FAKE2" class="ocf" provider="heartbeat" type="Dummy"/>
|
|
 |
1f016a |
- <attributes CRM_meta_interval="60000" CRM_meta_name="monitor" CRM_meta_timeout="20000" />
|
|
 |
1f016a |
- </rsc_op>
|
|
 |
1f016a |
- </action_set>
|
|
 |
1f016a |
- <inputs>
|
|
 |
1f016a |
- <trigger>
|
|
 |
1f016a |
- <rsc_op id="19" operation="start" operation_key="FAKE2_start_0" on_node="18builder" on_node_uuid="5"/>
|
|
 |
1f016a |
- </trigger>
|
|
 |
1f016a |
- </inputs>
|
|
 |
1f016a |
- </synapse>
|
|
 |
1f016a |
- <synapse id="2">
|
|
 |
1f016a |
- <action_set>
|
|
 |
1f016a |
- <rsc_op id="19" operation="start" operation_key="FAKE2_start_0" on_node="18builder" on_node_uuid="5">
|
|
 |
1f016a |
- <primitive id="FAKE2" class="ocf" provider="heartbeat" type="Dummy"/>
|
|
 |
1f016a |
- <attributes CRM_meta_timeout="20000" />
|
|
 |
1f016a |
- </rsc_op>
|
|
 |
1f016a |
- </action_set>
|
|
 |
1f016a |
- <inputs>
|
|
 |
1f016a |
- <trigger>
|
|
 |
1f016a |
- <rsc_op id="18" operation="stop" operation_key="FAKE2_stop_0" on_node="remote1" on_node_uuid="remote1" router_node="18builder"/>
|
|
 |
1f016a |
- </trigger>
|
|
 |
1f016a |
- </inputs>
|
|
 |
1f016a |
- </synapse>
|
|
 |
1f016a |
- <synapse id="3">
|
|
 |
1f016a |
- <action_set>
|
|
 |
1f016a |
<rsc_op id="18" operation="stop" operation_key="FAKE2_stop_0" on_node="remote1" on_node_uuid="remote1" router_node="18builder">
|
|
 |
1f016a |
<primitive id="FAKE2" class="ocf" provider="heartbeat" type="Dummy"/>
|
|
 |
1f016a |
<attributes CRM_meta_timeout="20000" />
|
|
 |
1f016a |
@@ -47,7 +21,7 @@
|
|
 |
1f016a |
</action_set>
|
|
 |
1f016a |
<inputs/>
|
|
 |
1f016a |
</synapse>
|
|
 |
1f016a |
- <synapse id="4">
|
|
 |
1f016a |
+ <synapse id="2">
|
|
 |
1f016a |
<action_set>
|
|
 |
1f016a |
<pseudo_event id="7" operation="all_stopped" operation_key="all_stopped">
|
|
 |
1f016a |
<attributes />
|
|
 |
1f016a |
diff --git a/pengine/test10/remote-disable.scores b/pengine/test10/remote-disable.scores
|
|
 |
1f016a |
index 4efd7d8..d66861f 100644
|
|
 |
1f016a |
--- a/pengine/test10/remote-disable.scores
|
|
 |
1f016a |
+++ b/pengine/test10/remote-disable.scores
|
|
 |
1f016a |
@@ -3,9 +3,9 @@ native_color: FAKE1 allocation score on 18builder: 0
|
|
 |
1f016a |
native_color: FAKE1 allocation score on 18node1: 0
|
|
 |
1f016a |
native_color: FAKE1 allocation score on 18node2: 0
|
|
 |
1f016a |
native_color: FAKE1 allocation score on remote1: 0
|
|
 |
1f016a |
-native_color: FAKE2 allocation score on 18builder: 0
|
|
 |
1f016a |
-native_color: FAKE2 allocation score on 18node1: 0
|
|
 |
1f016a |
-native_color: FAKE2 allocation score on 18node2: 0
|
|
 |
1f016a |
+native_color: FAKE2 allocation score on 18builder: -INFINITY
|
|
 |
1f016a |
+native_color: FAKE2 allocation score on 18node1: -INFINITY
|
|
 |
1f016a |
+native_color: FAKE2 allocation score on 18node2: -INFINITY
|
|
 |
1f016a |
native_color: FAKE2 allocation score on remote1: 0
|
|
 |
1f016a |
native_color: FAKE3 allocation score on 18builder: 0
|
|
 |
1f016a |
native_color: FAKE3 allocation score on 18node1: 0
|
|
 |
1f016a |
diff --git a/pengine/test10/remote-disable.summary b/pengine/test10/remote-disable.summary
|
|
 |
1f016a |
index 57b06fe..0627647 100644
|
|
 |
1f016a |
--- a/pengine/test10/remote-disable.summary
|
|
 |
1f016a |
+++ b/pengine/test10/remote-disable.summary
|
|
 |
1f016a |
@@ -12,14 +12,12 @@ RemoteOnline: [ remote1 ]
|
|
 |
1f016a |
|
|
 |
1f016a |
Transition Summary:
|
|
 |
1f016a |
* Stop remote1 (18builder)
|
|
 |
1f016a |
- * Move FAKE2 (Started remote1 -> 18builder)
|
|
 |
1f016a |
+ * Stop FAKE2 (remote1)
|
|
 |
1f016a |
|
|
 |
1f016a |
Executing cluster transition:
|
|
 |
1f016a |
* Resource action: FAKE2 stop on remote1
|
|
 |
1f016a |
* Resource action: remote1 stop on 18builder
|
|
 |
1f016a |
- * Resource action: FAKE2 start on 18builder
|
|
 |
1f016a |
* Pseudo action: all_stopped
|
|
 |
1f016a |
- * Resource action: FAKE2 monitor=60000 on 18builder
|
|
 |
1f016a |
|
|
 |
1f016a |
Revised cluster status:
|
|
 |
1f016a |
Online: [ 18builder 18node1 18node2 ]
|
|
 |
1f016a |
@@ -28,7 +26,7 @@ RemoteOFFLINE: [ remote1 ]
|
|
 |
1f016a |
shooter (stonith:fence_xvm): Started 18node1
|
|
 |
1f016a |
remote1 (ocf::pacemaker:remote): Stopped
|
|
 |
1f016a |
FAKE1 (ocf::heartbeat:Dummy): Started 18node2
|
|
 |
1f016a |
- FAKE2 (ocf::heartbeat:Dummy): Started 18builder
|
|
 |
1f016a |
+ FAKE2 (ocf::heartbeat:Dummy): Stopped
|
|
 |
1f016a |
FAKE3 (ocf::heartbeat:Dummy): Started 18builder
|
|
 |
1f016a |
FAKE4 (ocf::heartbeat:Dummy): Started 18node1
|
|
 |
1f016a |
|
|
 |
1f016a |
diff --git a/pengine/test10/remote-disable.xml b/pengine/test10/remote-disable.xml
|
|
 |
1f016a |
index eae0055..1637801 100644
|
|
 |
1f016a |
--- a/pengine/test10/remote-disable.xml
|
|
 |
1f016a |
+++ b/pengine/test10/remote-disable.xml
|
|
 |
1f016a |
@@ -52,7 +52,13 @@
|
|
 |
1f016a |
</operations>
|
|
 |
1f016a |
</primitive>
|
|
 |
1f016a |
</resources>
|
|
 |
1f016a |
- <constraints/>
|
|
 |
1f016a |
+ <constraints>
|
|
 |
1f016a |
+ <rsc_location id="FAKE2-location" rsc="FAKE2">
|
|
 |
1f016a |
+ <rule id="FAKE2-on-remote" score="-INFINITY">
|
|
 |
1f016a |
+ <expression id="FAKE2-on-remote-exp" attribute="#kind" operation="ne" value="remote"/>
|
|
 |
1f016a |
+ </rule>
|
|
 |
1f016a |
+ </rsc_location>
|
|
 |
1f016a |
+ </constraints>
|
|
 |
1f016a |
</configuration>
|
|
 |
1f016a |
<status>
|
|
 |
1f016a |
<node_state id="5" uname="18builder" in_ccm="true" crmd="online" crm-debug-origin="do_update_resource" join="member" expected="member">
|
|
 |
1f016a |
diff --git a/pengine/test10/resource-discovery.dot b/pengine/test10/resource-discovery.dot
|
|
 |
1f016a |
new file mode 100644
|
|
 |
1f016a |
index 0000000..efb2434
|
|
 |
1f016a |
--- /dev/null
|
|
 |
1f016a |
+++ b/pengine/test10/resource-discovery.dot
|
|
 |
1f016a |
@@ -0,0 +1,185 @@
|
|
 |
1f016a |
+ digraph "g" {
|
|
 |
1f016a |
+"FAKE1_monitor_0 18builder" -> "probe_complete 18builder" [ style = bold]
|
|
 |
1f016a |
+"FAKE1_monitor_0 18builder" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE1_monitor_0 18node1" -> "probe_complete 18node1" [ style = bold]
|
|
 |
1f016a |
+"FAKE1_monitor_0 18node1" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE1_monitor_0 18node2" -> "probe_complete 18node2" [ style = bold]
|
|
 |
1f016a |
+"FAKE1_monitor_0 18node2" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE1_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold]
|
|
 |
1f016a |
+"FAKE1_monitor_0 18node3" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE1_monitor_0 18node4" -> "probe_complete 18node4" [ style = bold]
|
|
 |
1f016a |
+"FAKE1_monitor_0 18node4" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE1_monitor_60000 18node2" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE1_start_0 18node2" -> "FAKE1_monitor_60000 18node2" [ style = bold]
|
|
 |
1f016a |
+"FAKE1_start_0 18node2" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE2_monitor_60000 18node3" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE2_start_0 18node3" -> "FAKE2_monitor_60000 18node3" [ style = bold]
|
|
 |
1f016a |
+"FAKE2_start_0 18node3" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE2_stop_0 18node2" -> "FAKE2_start_0 18node3" [ style = bold]
|
|
 |
1f016a |
+"FAKE2_stop_0 18node2" -> "all_stopped" [ style = bold]
|
|
 |
1f016a |
+"FAKE2_stop_0 18node2" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE3_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold]
|
|
 |
1f016a |
+"FAKE3_monitor_0 18node3" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE3_monitor_60000 18node3" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE3_start_0 18node3" -> "FAKE3_monitor_60000 18node3" [ style = bold]
|
|
 |
1f016a |
+"FAKE3_start_0 18node3" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE3_stop_0 18builder" -> "FAKE3_start_0 18node3" [ style = bold]
|
|
 |
1f016a |
+"FAKE3_stop_0 18builder" -> "all_stopped" [ style = bold]
|
|
 |
1f016a |
+"FAKE3_stop_0 18builder" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE4_monitor_0 18node4" -> "probe_complete 18node4" [ style = bold]
|
|
 |
1f016a |
+"FAKE4_monitor_0 18node4" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE4_monitor_60000 18node4" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE4_start_0 18node4" -> "FAKE4_monitor_60000 18node4" [ style = bold]
|
|
 |
1f016a |
+"FAKE4_start_0 18node4" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE4_stop_0 18node1" -> "FAKE4_start_0 18node4" [ style = bold]
|
|
 |
1f016a |
+"FAKE4_stop_0 18node1" -> "all_stopped" [ style = bold]
|
|
 |
1f016a |
+"FAKE4_stop_0 18node1" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE5_monitor_0 18builder" -> "probe_complete 18builder" [ style = bold]
|
|
 |
1f016a |
+"FAKE5_monitor_0 18builder" [ style=bold color="green" fontcolor="black"]
|
|
 |
1f016a |
+"FAKE5_monitor_0 18node1" -> "probe_complete 18node1" [ style = bold]
|
|
 |
1f016a |
+"FAKE5_monitor_0 18node1" [ style=bold color="green" fontcolor="black"]
|
|
 |
|