Blob Blame History Raw
commit 5fbd9decbca3f40a8b29e9d1ef5d158808f71cf6
Author: Andrew Beekhof <andrew@beekhof.net>
Date:   Wed Apr 9 14:40:30 2014 +1000

    Fix: fencing: Filter self-fencing at the peers to allow unfencing to work correctly
    
    (cherry picked from commit e1e7d7fad2b5ade3c6e433cb1b631d071b8d5e70)
    
    Conflicts:
    	fencing/commands.c
    	fencing/remote.c

diff --git a/fencing/commands.c b/fencing/commands.c
index 14933a5..abbb1ad 100644
--- a/fencing/commands.c
+++ b/fencing/commands.c
@@ -60,6 +60,7 @@ struct device_search_s {
     int per_device_timeout;
     int replies_needed;
     int replies_received;
+    bool allow_suicide;
 
     void *user_data;
     void (*callback) (GList * devices, void *user_data);
@@ -1072,8 +1073,17 @@ can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *searc
 
     if (dev->on_target_actions &&
         search->action &&
-        strstr(dev->on_target_actions, search->action) && safe_str_neq(host, stonith_our_uname)) {
+        strstr(dev->on_target_actions, search->action)) {
         /* this device can only execute this action on the target node */
+
+        if(safe_str_neq(host, stonith_our_uname)) {
+            crm_trace("%s operation with %s can only be executed for localhost not %s",
+                      search->action, dev->id, host);
+            goto search_report_results;
+        }
+
+    } else if(safe_str_eq(host, stonith_our_uname) && search->allow_suicide == FALSE) {
+        crm_trace("%s operation does not support self-fencing", search->action);
         goto search_report_results;
     }
 
@@ -1146,7 +1156,7 @@ search_devices(gpointer key, gpointer value, gpointer user_data)
 
 #define DEFAULT_QUERY_TIMEOUT 20
 static void
-get_capable_devices(const char *host, const char *action, int timeout, void *user_data,
+get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data,
                     void (*callback) (GList * devices, void *user_data))
 {
     struct device_search_s *search;
@@ -1199,6 +1209,7 @@ get_capable_devices(const char *host, const char *action, int timeout, void *use
      * unregistered some how during the async search, we will get
      * the correct number of replies. */
     search->replies_needed = g_hash_table_size(device_list);
+    search->allow_suicide = suicide;
     search->callback = callback;
     search->user_data = user_data;
     /* kick off the search */
@@ -1313,7 +1324,9 @@ stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int
     query->action = action ? strdup(action) : NULL;
     query->call_options = call_options;
 
-    get_capable_devices(target, action, timeout, query, stonith_query_capable_device_cb);
+    get_capable_devices(target, action, timeout,
+                        is_set(call_options, st_opt_allow_suicide),
+                        query, stonith_query_capable_device_cb);
 }
 
 #define ST_LOG_OUTPUT_MAX 512
@@ -1632,8 +1645,10 @@ stonith_fence(xmlNode * msg)
                 host = node->uname;
             }
         }
-        get_capable_devices(host, cmd->action, cmd->default_timeout, cmd,
-                            stonith_fence_get_devices_cb);
+
+        /* If we get to here, then self-fencing is implicitly allowed */
+        get_capable_devices(host, cmd->action, cmd->default_timeout,
+                            TRUE, cmd, stonith_fence_get_devices_cb);
     }
 
     return -EINPROGRESS;
diff --git a/fencing/remote.c b/fencing/remote.c
index 8c8df6d..399dce5 100644
--- a/fencing/remote.c
+++ b/fencing/remote.c
@@ -558,6 +558,7 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
 {
     remote_fencing_op_t *op = NULL;
     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
+    int call_options = 0;
 
     if (remote_op_list == NULL) {
         remote_op_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_remote_op);
@@ -612,7 +613,9 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
 
     op->target = crm_element_value_copy(dev, F_STONITH_TARGET);
     op->request = copy_xml(request);    /* TODO: Figure out how to avoid this */
-    crm_element_value_int(request, F_STONITH_CALLOPTS, (int *)&(op->call_options));
+    crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
+    op->call_options = call_options;
+
     crm_element_value_int(request, F_STONITH_CALLID, (int *)&(op->client_callid));
 
     crm_trace("%s new stonith op: %s - %s of %s for %s",
@@ -662,7 +665,7 @@ initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean ma
                    op->target, op->id);
         return op;
     }
-    
+
     CRM_CHECK(op->action, return NULL);
 
     if (stonith_topology_next(op) != pcmk_ok) {
@@ -695,6 +698,7 @@ initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean ma
     crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
     crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
     crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);
+    crm_xml_add_int(query, F_STONITH_CALLOPTS, op->call_options);
 
     send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
     free_xml(query);
@@ -964,29 +968,30 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
 
     if (peer) {
         int timeout_one = 0;
-        xmlNode *query = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
+        xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
 
-        crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id);
-        crm_xml_add(query, F_STONITH_TARGET, op->target);
-        crm_xml_add(query, F_STONITH_ACTION, op->action);
-        crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
-        crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
-        crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
-        crm_xml_add_int(query, F_STONITH_TIMEOUT, timeout);
+        crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id);
+        crm_xml_add(remote_op, F_STONITH_TARGET, op->target);
+        crm_xml_add(remote_op, F_STONITH_ACTION, op->action);
+        crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator);
+        crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id);
+        crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name);
+        crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout);
+        crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options);
 
         if (device) {
             timeout_one =
                 TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(peer, device, op->base_timeout);
             crm_info("Requesting that %s perform op %s %s with %s for %s (%ds)", peer->host,
                      op->action, op->target, device, op->client_name, timeout_one);
-            crm_xml_add(query, F_STONITH_DEVICE, device);
-            crm_xml_add(query, F_STONITH_MODE, "slave");
+            crm_xml_add(remote_op, F_STONITH_DEVICE, device);
+            crm_xml_add(remote_op, F_STONITH_MODE, "slave");
 
         } else {
             timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(peer, op->base_timeout);
             crm_info("Requesting that %s perform op %s %s for %s (%ds)",
                      peer->host, op->action, op->target, op->client_name, timeout_one);
-            crm_xml_add(query, F_STONITH_MODE, "smart");
+            crm_xml_add(remote_op, F_STONITH_MODE, "smart");
         }
 
         op->state = st_exec;
@@ -995,9 +1000,9 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
         }
         op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
 
-        send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, query, FALSE);
+        send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE);
         peer->tried = TRUE;
-        free_xml(query);
+        free_xml(remote_op);
         return;
 
     } else if (op->owner == FALSE) {
@@ -1131,24 +1136,20 @@ process_remote_stonith_query(xmlNode * msg)
 
     if (devices <= 0) {
         /* If we're doing 'known' then we might need to fire anyway */
-        crm_trace("Query result from %s (%d devices)", host, devices);
+        crm_trace("Query result %d of %d from %s for %s/%s (%d devices) %s",
+                  op->replies, op->replies_expected, host,
+                  op->target, op->action, devices, id);
         if(op->state == st_query && (op->replies >= op->replies_expected || op->replies >= active)) {
-            crm_info("All queries have arrived, continuing (%d, %d, %d) ", op->replies_expected, active, op->replies);
+            crm_info("All queries have arrived, continuing (%d, %d, %d, %s)",
+                     op->replies_expected, active, op->replies, id);
             call_remote_stonith(op, NULL);
         }
         return pcmk_ok;
-
-    } else if (host_is_target) {
-        if (op->call_options & st_opt_allow_suicide) {
-            crm_trace("Allowing %s to potentialy fence itself", op->target);
-        } else {
-            crm_info("Ignoring reply from %s, hosts are not permitted to commit suicide",
-                     op->target);
-            return pcmk_ok;
-        }
     }
 
-    crm_info("Query result %d of %d from %s (%d devices)", op->replies, op->replies_expected, host, devices);
+    crm_info("Query result %d of %d from %s for %s/%s (%d devices) %s",
+             op->replies, op->replies_expected, host,
+             op->target, op->action, devices, id);
     result = calloc(1, sizeof(st_query_result_t));
     result->host = strdup(host);
     result->devices = devices;
@@ -1206,10 +1207,6 @@ process_remote_stonith_query(xmlNode * msg)
             crm_trace("Found %d verified devices", g_hash_table_size(result->verified_devices));
             call_remote_stonith(op, result);
 
-        } else if (safe_str_eq(op->action, "on")) {
-            crm_trace("Unfencing %s", op->target);
-            call_remote_stonith(op, result);
-
         } else if(op->replies >= op->replies_expected || op->replies >= active) {
             crm_info("All queries have arrived, continuing (%d, %d, %d) ", op->replies_expected, active, op->replies);
             call_remote_stonith(op, NULL);