commit 5fbd9decbca3f40a8b29e9d1ef5d158808f71cf6
Author: Andrew Beekhof <andrew@beekhof.net>
Date: Wed Apr 9 14:40:30 2014 +1000
Fix: fencing: Filter self-fencing at the peers to allow unfencing to work correctly
(cherry picked from commit e1e7d7fad2b5ade3c6e433cb1b631d071b8d5e70)
Conflicts:
fencing/commands.c
fencing/remote.c
diff --git a/fencing/commands.c b/fencing/commands.c
index 14933a5..abbb1ad 100644
--- a/fencing/commands.c
+++ b/fencing/commands.c
@@ -60,6 +60,7 @@ struct device_search_s {
int per_device_timeout;
int replies_needed;
int replies_received;
+ bool allow_suicide;
void *user_data;
void (*callback) (GList * devices, void *user_data);
@@ -1072,8 +1073,17 @@ can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *searc
if (dev->on_target_actions &&
search->action &&
- strstr(dev->on_target_actions, search->action) && safe_str_neq(host, stonith_our_uname)) {
+ strstr(dev->on_target_actions, search->action)) {
/* this device can only execute this action on the target node */
+
+ if(safe_str_neq(host, stonith_our_uname)) {
+ crm_trace("%s operation with %s can only be executed for localhost not %s",
+ search->action, dev->id, host);
+ goto search_report_results;
+ }
+
+ } else if(safe_str_eq(host, stonith_our_uname) && search->allow_suicide == FALSE) {
+ crm_trace("%s operation does not support self-fencing", search->action);
goto search_report_results;
}
@@ -1146,7 +1156,7 @@ search_devices(gpointer key, gpointer value, gpointer user_data)
#define DEFAULT_QUERY_TIMEOUT 20
static void
-get_capable_devices(const char *host, const char *action, int timeout, void *user_data,
+get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data,
void (*callback) (GList * devices, void *user_data))
{
struct device_search_s *search;
@@ -1199,6 +1209,7 @@ get_capable_devices(const char *host, const char *action, int timeout, void *use
* unregistered some how during the async search, we will get
* the correct number of replies. */
search->replies_needed = g_hash_table_size(device_list);
+ search->allow_suicide = suicide;
search->callback = callback;
search->user_data = user_data;
/* kick off the search */
@@ -1313,7 +1324,9 @@ stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int
query->action = action ? strdup(action) : NULL;
query->call_options = call_options;
- get_capable_devices(target, action, timeout, query, stonith_query_capable_device_cb);
+ get_capable_devices(target, action, timeout,
+ is_set(call_options, st_opt_allow_suicide),
+ query, stonith_query_capable_device_cb);
}
#define ST_LOG_OUTPUT_MAX 512
@@ -1632,8 +1645,10 @@ stonith_fence(xmlNode * msg)
host = node->uname;
}
}
- get_capable_devices(host, cmd->action, cmd->default_timeout, cmd,
- stonith_fence_get_devices_cb);
+
+ /* If we get to here, then self-fencing is implicitly allowed */
+ get_capable_devices(host, cmd->action, cmd->default_timeout,
+ TRUE, cmd, stonith_fence_get_devices_cb);
}
return -EINPROGRESS;
diff --git a/fencing/remote.c b/fencing/remote.c
index 8c8df6d..399dce5 100644
--- a/fencing/remote.c
+++ b/fencing/remote.c
@@ -558,6 +558,7 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
{
remote_fencing_op_t *op = NULL;
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
+ int call_options = 0;
if (remote_op_list == NULL) {
remote_op_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_remote_op);
@@ -612,7 +613,9 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
op->target = crm_element_value_copy(dev, F_STONITH_TARGET);
op->request = copy_xml(request); /* TODO: Figure out how to avoid this */
- crm_element_value_int(request, F_STONITH_CALLOPTS, (int *)&(op->call_options));
+ crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
+ op->call_options = call_options;
+
crm_element_value_int(request, F_STONITH_CALLID, (int *)&(op->client_callid));
crm_trace("%s new stonith op: %s - %s of %s for %s",
@@ -662,7 +665,7 @@ initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean ma
op->target, op->id);
return op;
}
-
+
CRM_CHECK(op->action, return NULL);
if (stonith_topology_next(op) != pcmk_ok) {
@@ -695,6 +698,7 @@ initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean ma
crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);
+ crm_xml_add_int(query, F_STONITH_CALLOPTS, op->call_options);
send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
free_xml(query);
@@ -964,29 +968,30 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
if (peer) {
int timeout_one = 0;
- xmlNode *query = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
+ xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
- crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id);
- crm_xml_add(query, F_STONITH_TARGET, op->target);
- crm_xml_add(query, F_STONITH_ACTION, op->action);
- crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
- crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
- crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
- crm_xml_add_int(query, F_STONITH_TIMEOUT, timeout);
+ crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id);
+ crm_xml_add(remote_op, F_STONITH_TARGET, op->target);
+ crm_xml_add(remote_op, F_STONITH_ACTION, op->action);
+ crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator);
+ crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id);
+ crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name);
+ crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout);
+ crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options);
if (device) {
timeout_one =
TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(peer, device, op->base_timeout);
crm_info("Requesting that %s perform op %s %s with %s for %s (%ds)", peer->host,
op->action, op->target, device, op->client_name, timeout_one);
- crm_xml_add(query, F_STONITH_DEVICE, device);
- crm_xml_add(query, F_STONITH_MODE, "slave");
+ crm_xml_add(remote_op, F_STONITH_DEVICE, device);
+ crm_xml_add(remote_op, F_STONITH_MODE, "slave");
} else {
timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(peer, op->base_timeout);
crm_info("Requesting that %s perform op %s %s for %s (%ds)",
peer->host, op->action, op->target, op->client_name, timeout_one);
- crm_xml_add(query, F_STONITH_MODE, "smart");
+ crm_xml_add(remote_op, F_STONITH_MODE, "smart");
}
op->state = st_exec;
@@ -995,9 +1000,9 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
}
op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
- send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, query, FALSE);
+ send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE);
peer->tried = TRUE;
- free_xml(query);
+ free_xml(remote_op);
return;
} else if (op->owner == FALSE) {
@@ -1131,24 +1136,20 @@ process_remote_stonith_query(xmlNode * msg)
if (devices <= 0) {
/* If we're doing 'known' then we might need to fire anyway */
- crm_trace("Query result from %s (%d devices)", host, devices);
+ crm_trace("Query result %d of %d from %s for %s/%s (%d devices) %s",
+ op->replies, op->replies_expected, host,
+ op->target, op->action, devices, id);
if(op->state == st_query && (op->replies >= op->replies_expected || op->replies >= active)) {
- crm_info("All queries have arrived, continuing (%d, %d, %d) ", op->replies_expected, active, op->replies);
+ crm_info("All queries have arrived, continuing (%d, %d, %d, %s)",
+ op->replies_expected, active, op->replies, id);
call_remote_stonith(op, NULL);
}
return pcmk_ok;
-
- } else if (host_is_target) {
- if (op->call_options & st_opt_allow_suicide) {
- crm_trace("Allowing %s to potentialy fence itself", op->target);
- } else {
- crm_info("Ignoring reply from %s, hosts are not permitted to commit suicide",
- op->target);
- return pcmk_ok;
- }
}
- crm_info("Query result %d of %d from %s (%d devices)", op->replies, op->replies_expected, host, devices);
+ crm_info("Query result %d of %d from %s for %s/%s (%d devices) %s",
+ op->replies, op->replies_expected, host,
+ op->target, op->action, devices, id);
result = calloc(1, sizeof(st_query_result_t));
result->host = strdup(host);
result->devices = devices;
@@ -1206,10 +1207,6 @@ process_remote_stonith_query(xmlNode * msg)
crm_trace("Found %d verified devices", g_hash_table_size(result->verified_devices));
call_remote_stonith(op, result);
- } else if (safe_str_eq(op->action, "on")) {
- crm_trace("Unfencing %s", op->target);
- call_remote_stonith(op, result);
-
} else if(op->replies >= op->replies_expected || op->replies >= active) {
crm_info("All queries have arrived, continuing (%d, %d, %d) ", op->replies_expected, active, op->replies);
call_remote_stonith(op, NULL);