commit 5fbd9decbca3f40a8b29e9d1ef5d158808f71cf6 Author: Andrew Beekhof Date: Wed Apr 9 14:40:30 2014 +1000 Fix: fencing: Filter self-fencing at the peers to allow unfencing to work correctly (cherry picked from commit e1e7d7fad2b5ade3c6e433cb1b631d071b8d5e70) Conflicts: fencing/commands.c fencing/remote.c diff --git a/fencing/commands.c b/fencing/commands.c index 14933a5..abbb1ad 100644 --- a/fencing/commands.c +++ b/fencing/commands.c @@ -60,6 +60,7 @@ struct device_search_s { int per_device_timeout; int replies_needed; int replies_received; + bool allow_suicide; void *user_data; void (*callback) (GList * devices, void *user_data); @@ -1072,8 +1073,17 @@ can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *searc if (dev->on_target_actions && search->action && - strstr(dev->on_target_actions, search->action) && safe_str_neq(host, stonith_our_uname)) { + strstr(dev->on_target_actions, search->action)) { /* this device can only execute this action on the target node */ + + if(safe_str_neq(host, stonith_our_uname)) { + crm_trace("%s operation with %s can only be executed for localhost not %s", + search->action, dev->id, host); + goto search_report_results; + } + + } else if(safe_str_eq(host, stonith_our_uname) && search->allow_suicide == FALSE) { + crm_trace("%s operation does not support self-fencing", search->action); goto search_report_results; } @@ -1146,7 +1156,7 @@ search_devices(gpointer key, gpointer value, gpointer user_data) #define DEFAULT_QUERY_TIMEOUT 20 static void -get_capable_devices(const char *host, const char *action, int timeout, void *user_data, +get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data, void (*callback) (GList * devices, void *user_data)) { struct device_search_s *search; @@ -1199,6 +1209,7 @@ get_capable_devices(const char *host, const char *action, int timeout, void *use * unregistered some how during the async search, we will get * the correct number of replies. */ search->replies_needed = g_hash_table_size(device_list); + search->allow_suicide = suicide; search->callback = callback; search->user_data = user_data; /* kick off the search */ @@ -1313,7 +1324,9 @@ stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int query->action = action ? strdup(action) : NULL; query->call_options = call_options; - get_capable_devices(target, action, timeout, query, stonith_query_capable_device_cb); + get_capable_devices(target, action, timeout, + is_set(call_options, st_opt_allow_suicide), + query, stonith_query_capable_device_cb); } #define ST_LOG_OUTPUT_MAX 512 @@ -1632,8 +1645,10 @@ stonith_fence(xmlNode * msg) host = node->uname; } } - get_capable_devices(host, cmd->action, cmd->default_timeout, cmd, - stonith_fence_get_devices_cb); + + /* If we get to here, then self-fencing is implicitly allowed */ + get_capable_devices(host, cmd->action, cmd->default_timeout, + TRUE, cmd, stonith_fence_get_devices_cb); } return -EINPROGRESS; diff --git a/fencing/remote.c b/fencing/remote.c index 8c8df6d..399dce5 100644 --- a/fencing/remote.c +++ b/fencing/remote.c @@ -558,6 +558,7 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer) { remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); + int call_options = 0; if (remote_op_list == NULL) { remote_op_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_remote_op); @@ -612,7 +613,9 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer) op->target = crm_element_value_copy(dev, F_STONITH_TARGET); op->request = copy_xml(request); /* TODO: Figure out how to avoid this */ - crm_element_value_int(request, F_STONITH_CALLOPTS, (int *)&(op->call_options)); + crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); + op->call_options = call_options; + crm_element_value_int(request, F_STONITH_CALLID, (int *)&(op->client_callid)); crm_trace("%s new stonith op: %s - %s of %s for %s", @@ -662,7 +665,7 @@ initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean ma op->target, op->id); return op; } - + CRM_CHECK(op->action, return NULL); if (stonith_topology_next(op) != pcmk_ok) { @@ -695,6 +698,7 @@ initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean ma crm_xml_add(query, F_STONITH_CLIENTID, op->client_id); crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout); + crm_xml_add_int(query, F_STONITH_CALLOPTS, op->call_options); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); @@ -964,29 +968,30 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) if (peer) { int timeout_one = 0; - xmlNode *query = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0); + xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0); - crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id); - crm_xml_add(query, F_STONITH_TARGET, op->target); - crm_xml_add(query, F_STONITH_ACTION, op->action); - crm_xml_add(query, F_STONITH_ORIGIN, op->originator); - crm_xml_add(query, F_STONITH_CLIENTID, op->client_id); - crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name); - crm_xml_add_int(query, F_STONITH_TIMEOUT, timeout); + crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id); + crm_xml_add(remote_op, F_STONITH_TARGET, op->target); + crm_xml_add(remote_op, F_STONITH_ACTION, op->action); + crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator); + crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id); + crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name); + crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout); + crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options); if (device) { timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(peer, device, op->base_timeout); crm_info("Requesting that %s perform op %s %s with %s for %s (%ds)", peer->host, op->action, op->target, device, op->client_name, timeout_one); - crm_xml_add(query, F_STONITH_DEVICE, device); - crm_xml_add(query, F_STONITH_MODE, "slave"); + crm_xml_add(remote_op, F_STONITH_DEVICE, device); + crm_xml_add(remote_op, F_STONITH_MODE, "slave"); } else { timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(peer, op->base_timeout); crm_info("Requesting that %s perform op %s %s for %s (%ds)", peer->host, op->action, op->target, op->client_name, timeout_one); - crm_xml_add(query, F_STONITH_MODE, "smart"); + crm_xml_add(remote_op, F_STONITH_MODE, "smart"); } op->state = st_exec; @@ -995,9 +1000,9 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) } op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); - send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, query, FALSE); + send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE); peer->tried = TRUE; - free_xml(query); + free_xml(remote_op); return; } else if (op->owner == FALSE) { @@ -1131,24 +1136,20 @@ process_remote_stonith_query(xmlNode * msg) if (devices <= 0) { /* If we're doing 'known' then we might need to fire anyway */ - crm_trace("Query result from %s (%d devices)", host, devices); + crm_trace("Query result %d of %d from %s for %s/%s (%d devices) %s", + op->replies, op->replies_expected, host, + op->target, op->action, devices, id); if(op->state == st_query && (op->replies >= op->replies_expected || op->replies >= active)) { - crm_info("All queries have arrived, continuing (%d, %d, %d) ", op->replies_expected, active, op->replies); + crm_info("All queries have arrived, continuing (%d, %d, %d, %s)", + op->replies_expected, active, op->replies, id); call_remote_stonith(op, NULL); } return pcmk_ok; - - } else if (host_is_target) { - if (op->call_options & st_opt_allow_suicide) { - crm_trace("Allowing %s to potentialy fence itself", op->target); - } else { - crm_info("Ignoring reply from %s, hosts are not permitted to commit suicide", - op->target); - return pcmk_ok; - } } - crm_info("Query result %d of %d from %s (%d devices)", op->replies, op->replies_expected, host, devices); + crm_info("Query result %d of %d from %s for %s/%s (%d devices) %s", + op->replies, op->replies_expected, host, + op->target, op->action, devices, id); result = calloc(1, sizeof(st_query_result_t)); result->host = strdup(host); result->devices = devices; @@ -1206,10 +1207,6 @@ process_remote_stonith_query(xmlNode * msg) crm_trace("Found %d verified devices", g_hash_table_size(result->verified_devices)); call_remote_stonith(op, result); - } else if (safe_str_eq(op->action, "on")) { - crm_trace("Unfencing %s", op->target); - call_remote_stonith(op, result); - } else if(op->replies >= op->replies_expected || op->replies >= active) { crm_info("All queries have arrived, continuing (%d, %d, %d) ", op->replies_expected, active, op->replies); call_remote_stonith(op, NULL);